Project description

Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.

Introduction

In a previous document, I classified LAD borders as 1) CTCF-bound or not 2) shared or unique. Here, I will visualize what the effect is of CTCF depletion on the various types of LAD borders. Also, the effect of the other depletions.

Method

Load (z-scale) DamID tracks and plot effect on different types of LAD borders.

Set-up

Load the libraries and set the parameters.

# Load dependencies
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(caTools))
suppressPackageStartupMessages(library(yaml))
suppressPackageStartupMessages(library(M3C))
suppressPackageStartupMessages(library(ggbeeswarm))
suppressPackageStartupMessages(library(broom))
suppressPackageStartupMessages(library(corrr))

bin.size <- "10kb"
damid.dir <- file.path("../results_NQ/normalized/", paste0("bin-", bin.size))

# Prepare output 
output.dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders"
dir.create(output.dir, showWarnings = FALSE)


# Prepare input
input.dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
metadata.borders <- readRDS(file.path(input.dir, "metadata.rds"))
LADs <- readRDS(file.path(input.dir, "LADs.rds")) 
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders.rds"))
CTCF.sites <- readRDS(file.path(input.dir, "CTCF_sites.rds"))

LADs <- readRDS(file.path(input.dir, "LADs_pA.rds"))
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders_pA.rds"))

borders <- LAD.borders[["mESC_pA_PT"]]
borders$class <- "xxx"

Prepare knitr output.

library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
               message = F, warning = F,
               dev=c('png', 'pdf'), fig.path = file.path(output.dir, "figures/")) 
pdf.options(useDingbats = FALSE)

List functions.

LoadDamID <- function(metadata, damid.dir, column = "file") {
  
  # Load data
  tib <- purrr::reduce(lapply(1:nrow(metadata),
                              function(i) {
                                f <- metadata[i, ] %>% pull(column)
                                s <- as.character(metadata$sample)[i]
                                tmp <- read_tsv(file.path(damid.dir, f), 
                                                col_names = c("seqnames", "start", "end", s))
                              }),
                       full_join)
  
  # Convert to GRanges
  tib$start <- tib$start + 1
  gr <- as(tib, "GRanges")
  
  # Filter chromosomes
  gr <- gr[seqnames(gr) %in% c(paste0("chr", 1:22), "chrX")]
  gr
  
}

LoadDamIDCounts <- function(metadata, damid.dir.counts, 
                            yaml = "../bin/snakemake/config.yaml") {
  
  # List samples from yaml file
  yaml_list <- read_yaml(yaml)
  
  # Read lmnb1 counts
  idx <- which(names(yaml_list$dam_controls) %in% unlist(yaml_list$replicates))
  
  lmnb1_files <- names(yaml_list$dam_controls)[idx]
  dam_files <- unlist(yaml_list$dam_controls)[idx]
  
  # Prepare counts metadata
  metadata_counts <- tibble(lmnb1 = lmnb1_files,
                            dam = dam_files) %>%
    mutate(sample = str_remove(lmnb1, "pADamID_NQ_"),
           sample = str_remove(sample, "pADamID_"),
           sample = str_remove(sample, "-")) %>%
    separate(sample, c("condition", "timepoint", "replicate"), remove = F) %>%
    mutate(idx = match(paste0(condition, timepoint),
                       paste0(metadata$condition, metadata$timepoint)),
           group = metadata$sample[idx],
           lmnb1 = paste0(lmnb1, "-10kb.counts.txt.gz"),
           dam = paste0(dam, "-10kb.counts.txt.gz")) %>%
    arrange(idx, replicate)
  
  # Read lmnb1 files
  gr_lmnb1 <- LoadDamID(metadata_counts, damid.dir.counts, column = "lmnb1")
  
  # CPM and mean of replicates
  tib <- as_tibble(mcols(gr_lmnb1)) %>%
    mutate_all(function(x) x / sum(x) * 1e6) %>%
    add_column(bin = 1:nrow(.)) %>%
    gather(key, value, -bin) %>%
    mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
    group_by(bin, sample) %>%
    summarise(mean = mean(value)) %>%
    ungroup() %>%
    spread(sample, mean) %>%
    dplyr::select(-bin)
  
  mcols(gr_lmnb1) <- tib
  
  # Read dam files
  gr_dam <- LoadDamID(metadata_counts, damid.dir.counts, column = "dam")
  
  # CPM and mean of replicates
  tib <- as_tibble(mcols(gr_dam)) %>%
    mutate_all(function(x) x / sum(x) * 1e6) %>%
    add_column(bin = 1:nrow(.)) %>%
    gather(key, value, -bin) %>%
    mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
    group_by(bin, sample) %>%
    summarise(mean = mean(value)) %>%
    ungroup() %>%
    spread(sample, mean) %>%
    dplyr::select(-bin)
  
  mcols(gr_dam) <- tib
  
  list(gr_lmnb1, gr_dam)
  
}

PlotDensity <- function(damid, title, xlimits = NULL, replicate = F, conditions = NULL) {
  
  if (replicate) {
    tib <- as_tibble(mcols(damid)) %>%
      gather() %>%
      separate(key, c("condition", "timepoint", "replicate"), remove = F, sep = "_") %>%
      mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
             timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)),
             replicate = factor(replicate, levels = paste0("r", 1:10))) %>%
      drop_na()
  } else {
    tib <- as_tibble(mcols(damid)) %>%
      gather() %>%
      separate(key, c("condition", "timepoint"), remove = F, sep = "_") %>%
      mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
             timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
      drop_na()
  }
  
  nrow <- 2
  if (! is.null(conditions)) {
    tib <- tib %>%
      filter(condition %in% conditions)
    nrow <- 1
  }
  
  plt <- tib %>%
    ggplot(aes(x = value, col = timepoint)) +
    #geom_density() +
    facet_wrap( ~ condition, nrow = nrow) +
    ggtitle(title) +
    xlab("DamID score") +
    ylab("Density") +
    scale_color_brewer(palette = "Dark2") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  if (replicate) {
    plt <- plt + geom_density(aes(linetype = replicate))
  } else {
    plt <- plt + geom_density()
  }
  
  if (! is.null(xlimits)) {
    plt <- plt +
      coord_cartesian(xlim = xlimits)
  }
  
  plt
  
}

ScaleDamID <- function(damid) {
  tib <- as_tibble(mcols(damid))
  tib.scaled <- as_tibble(scale(tib))
  mcols(damid) <- tib.scaled
  damid
}

grMid <- function(gr) {
  start(gr) <- end(gr) <- rowMeans(cbind(start(gr), end(gr)))
  gr
}

DistanceToLADBorder <- function(sites, borders, nearest = T, border.class = F) {
  # Given a GRanges of damid and LAD borders, calculate the 
  # distance to the preceding and following LAD. Return a new GRanges
  
  # Make sure the chromosomes are as they should be - especially for the bins
  sites <- sites[seqnames(sites) %in% c(paste0("chr", 1:22),
                                        "chrX")]
  borders <- borders[seqnames(borders) %in% c(paste0("chr", 1:22),
                                              "chrX")]
  
  # Preceding distance
  sites$dis.precede <- sites$strand.precede <- 
    sites$border.class.precede <- sites$border.ctcf.precede <- NA
  idx.precede <- precede(sites, borders, ignore.strand = T, select = "all")
  sites$dis.precede[queryHits(idx.precede)] <- distance(sites[queryHits(idx.precede)], 
                                                        borders[subjectHits(idx.precede)], 
                                                        ignore.strand = T)
  sites$strand.precede[queryHits(idx.precede)] <- strand(borders[subjectHits(idx.precede)])
  
  sites$border.class.precede[queryHits(idx.precede)] <- borders$class[subjectHits(idx.precede)]
  sites$border.ctcf.precede[queryHits(idx.precede)] <- borders$CTCF[subjectHits(idx.precede)]
  
  
  # Following distance
  sites$dis.follow <- sites$strand.follow <- 
    sites$border.class.follow <- sites$border.ctcf.follow <- NA
  idx.follow <- follow(sites, borders, ignore.strand = T, select = "all")
  sites$dis.follow[queryHits(idx.follow)] <- distance(sites[queryHits(idx.follow)], 
                                                      borders[subjectHits(idx.follow)], 
                                                      ignore.strand = T)
  sites$strand.follow[queryHits(idx.follow)] <- strand(borders[subjectHits(idx.follow)])
  
  sites$border.class.follow[queryHits(idx.follow)] <- borders$class[subjectHits(idx.follow)]
  sites$border.ctcf.follow[queryHits(idx.follow)] <- borders$CTCF[subjectHits(idx.follow)]
  
  
  # Exception: overlapping (follow = distance (0), precede = NA)
  idx.overlap <- findOverlaps(sites, borders, ignore.strand = T)
  sites$dis.follow[queryHits(idx.overlap)] <- distance(sites[queryHits(idx.overlap)], 
                                                       borders[subjectHits(idx.overlap)], 
                                                       ignore.strand = T)
  sites$dis.precede[queryHits(idx.overlap)] <- NA
  sites$strand.follow[queryHits(idx.overlap)] <- sites$strand.precede[queryHits(idx.overlap)] <- 
    strand(borders[subjectHits(idx.overlap)])
  
  sites$border.class.precede[queryHits(idx.overlap)] <- 
    sites$border.class.follow[queryHits(idx.overlap)] <-
    borders$class[subjectHits(idx.overlap)]
  sites$border.ctcf.precede[queryHits(idx.overlap)] <- 
    sites$border.ctcf.follow[queryHits(idx.overlap)] <-
    borders$CTCF[subjectHits(idx.overlap)]
  
  
  # Alternative: only use information from the nearest hit
  if (nearest) {
    # Remove precede information if follow is smaller
    idx.remove.precede <- which(sites$dis.follow < sites$dis.precede)
    sites$dis.precede[idx.remove.precede] <- NA
    # Remove follow information if precede is smaller
    idx.remove.follow <- which(sites$dis.follow > sites$dis.precede)
    sites$dis.follow[idx.remove.follow] <- NA
  }
  
  sites
  
}

CountPerBins <- function(sites, bin.size = 10000) {
  
  tib <- as_tibble(mcols(sites)) %>%
    add_column(number = 1:nrow(.)) %>%
    mutate(dis.precede.group = as.numeric(cut(dis.precede, 
                                              breaks = seq(0, max(dis.precede, na.rm = T) + 1, 
                                                           by = bin.size))) - 1,
           dis.follow.group = as.numeric(cut(dis.follow, 
                                             breaks = seq(0, max(dis.follow, na.rm = T) + 1, 
                                                          by = bin.size))) - 1) %>%
    dplyr::select(-dis.precede, -dis.follow) %>%
    mutate(dis.precede.group = ifelse(strand.precede == "+", 
                                      -dis.precede.group, dis.precede.group),
           dis.follow.group = ifelse(strand.follow == "+", 
                                     dis.follow.group, -dis.follow.group)) %>%
    dplyr::select(-strand.precede, -strand.follow) %>%
    gather(key, value, dis.precede.group, dis.follow.group) %>%
    drop_na() %>%
    mutate(border.class = ifelse(key == "dis.precede.group", 
                                 border.class.precede, border.class.follow),
           border.ctcf = ifelse(key == "dis.precede.group", 
                                border.ctcf.precede, border.ctcf.follow)) %>%
    dplyr::select(-border.class.precede, -border.class.follow, 
                  -border.ctcf.precede, -border.ctcf.follow) %>%
    gather(sample, score, -number, -key, -value, -border.class, -border.ctcf) %>%
    group_by(value, border.class, border.ctcf, sample) %>%
    summarise(count = n(),
              mean = mean(score)) %>%
    ungroup() %>%
    mutate(sample = factor(sample, levels = levels(metadata.damid$sample)),
           border.ctcf = factor(border.ctcf, levels = c("nonCTCF", "CTCF")),
           border.class = factor(border.class, levels = c("shared", "unique"))) %>%
    arrange(sample, border.class, border.ctcf)
  
  tib
  
}

PlotDamIDScoresAndDifferences <- function(damid.summary, 
                                          xlimits = c(-0.4, 0.4),
                                          ylimits.diff = c(-0.3, 0.2),
                                          extra_grouping = NULL) {
  tib <- damid.summary %>%
    mutate(mean = runmean(mean, k = 5)) %>%
    group_by_at(c("sample", "value", extra_grouping)) %>%
    summarise(combined.count = sum(count),
              combined.mean = weighted.mean(mean, count)) %>%
    filter(combined.count > 20) %>%
    separate(sample, c("condition", "timepoint"), remove = F) %>%
    mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
           timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
    ungroup()
  
  plt <- tib %>%
    ggplot(aes(x = value / 100, y = combined.mean, col = border.ctcf)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_grid(as.formula(paste(paste(c("condition"), 
                                      collapse = " + "), 
                                "~", "timepoint"))) +
    ggtitle("DamID scores at LAD borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = c(-1, 1)) +
    scale_color_manual(values = c("blue", "red")) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # Also, plot the difference between 0h and the others
  tib.difference <- tib %>%
    dplyr::select(-sample, -combined.count) %>%
    spread(key = timepoint, value = combined.mean) %>%
    mutate(diff.6h = `6h` - `0h`,
           diff.24h = `24h` - `0h`,
           diff.96h = `96h` - `0h`) %>%
    dplyr::select(-`0h`, -`6h`, -`24h`, -`96h`) %>%
    gather(timepoint, combined.difference, diff.6h, diff.24h, diff.96h) %>%
    mutate(timepoint = factor(timepoint, levels = c("diff.6h", "diff.24h", "diff.96h")))
  
  plt <- tib.difference %>%
    ggplot(aes(x = value / 100, y = combined.difference, col = border.ctcf)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_grid(as.formula(paste(paste(c("condition"), 
                                      collapse = " + "), 
                                "~", "timepoint"))) +
    ggtitle("DamID difference at LAD borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID difference (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
    scale_color_manual(values = c("blue", "red")) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # Also, plot the difference between CTCF and non-CTCF borders
  tib.difference <- tib %>%
    dplyr::select(-sample, -combined.count) %>%
    spread(key = border.ctcf, value = combined.mean) %>%
    mutate(diff = CTCF - nonCTCF) %>%
    dplyr::select(-nonCTCF, -CTCF)
  
  plt <- tib.difference %>%
    ggplot(aes(x = value / 100, y = diff, col = timepoint)) +
    annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5, 
             fill = "grey", alpha = 0.3) +  
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    geom_line(size = 1) +
    facet_wrap( ~ condition, nrow = 2) +
    ggtitle("DamID difference at CTCF borders") +
    xlab("Distance from LAD border (Mb)") +
    ylab("DamID difference (z-score)") +
    coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
    scale_color_brewer(palette = "Set1") +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
}

1. Load data

1.1 Previous data

Load the required data. First, the previous data sets that I need.

# See above

1.2 DamID data

Also, load DamID data. I will work with combined data tracks of replicates.

Also, load the counts. These can be used to illustrate whether any effects are caused by a change in accessibility or Lamin B1 reads.

# Prepare DamID metadata
metadata.damid <- tibble(dir(damid.dir, pattern = "combined.*norm.txt.gz"),
                         .name_repair = ~ c("file")) %>%
  mutate(sample = str_remove(str_remove(file, "pADamID_"),
                             paste0("-", bin.size, ".*"))) %>%
  mutate(sample = str_remove(sample, "^NQ_")) %>%
  mutate(sample = str_replace_all(sample, "-", "")) %>%
  separate(sample, c("condition", "timepoint"), remove = F, sep = "_") %>%
  mutate(condition = factor(condition, levels = c("PT", "CTCFEL", "CTCFNQ", 
                                                  "CTCF", "RAD21", "WAPL", "CTCFWAPL")),
         timepoint = factor(timepoint, levels = c("0h", "6h", "24h", "96h"))) %>%
  arrange(condition, timepoint) %>%
  add_column(cell = factor("mESC")) %>%
  mutate(sample = factor(sample, levels = unique(sample))) %>%
  drop_na()

# Load DamID
damid <- LoadDamID(metadata.damid, damid.dir)

# Load DamID counts
damid.counts <- LoadDamIDCounts(metadata.damid, 
                                str_replace(damid.dir,
                                            "normalized",
                                            "counts"))
damid.lmnb1 <- damid.counts[[1]]
damid.dam <- damid.counts[[2]]

I want to show the effect of scaling using one sample - do that here. Note that this is done on individual replicates.

# Prepare DamID metadata
metadata.scaling <- tibble(file = dir(damid.dir, pattern = ".*norm.txt.gz")) %>%
  filter(! grepl("combined", file)) %>%
  filter(! grepl("RAD21_r9", file)) %>%
  #filter(grepl("CTCF-WAPL", file)) %>%
  mutate(sample = str_remove(str_remove(file, "pADamID_"),
                             paste0("-", bin.size, ".*"))) %>%
  mutate(sample = str_remove(sample, "^NQ_")) %>%
  mutate(sample = str_replace_all(sample, "-", "")) %>%
  filter(! grepl("DMSO|EED|GSK", sample)) %>%
  separate(sample, c("condition", "timepoint", "replicate"), remove = F, sep = "_") %>%
  mutate(condition = factor(condition, levels = c("PT", "CTCFEL", "CTCFNQ", 
                                                  "CTCF", "WAPL", "CTCFWAPL",
                                                  "RAD21")),
         timepoint = factor(timepoint, levels = c("0h", "6h", "24h", "96h"))) %>%
  arrange(condition, timepoint) %>%
  add_column(cell = factor("mESC")) %>%
  mutate(sample = factor(sample, levels = unique(sample))) %>%
  filter(! (condition == "CTCFNQ" & replicate == "r4"),
         ! (condition == "RAD21" & replicate == "r3"),
         ! (condition == "PT" & replicate %in% c("r4", "r5", "r6")),
         ! (condition == "CTCFWAPL" & replicate == "r2" & timepoint == "24h"),
         ! (condition == "CTCFWAPL" & replicate == "r8"))

# Load DamID
damid.without.scaling <- LoadDamID(metadata.scaling, damid.dir)
 
# Scaling 
PlotDensity(damid.without.scaling, "Density before scaling", xlimits = c(-4, 3), 
            replicate = T)

damid_replicates <- damid.scaling <- ScaleDamID(damid.without.scaling)
PlotDensity(damid.scaling, "Density after scaling", xlimits = c(-3, 2.5), 
            replicate = T)

This nicely illustrates that data scaling is required. The dynamic range simply varies between replicates.

Update ts220203: can we translate the z-scaled values to log2-ratios? This translation is different for every sample. But, we can show all these correlations. Let’s do that here. The goal is to determine the range of ratios that a difference of 1 in z-space corresponds to.

# The easiest way to do this is to determine the standard deviation of
# every sample.
df_unscaled <- as_tibble(mcols(damid.without.scaling)) %>%
  summarise_all(function(x) sd(x, na.rm = T)) %>%
  t()

df_scaled <- as_tibble(mcols(damid.scaling)) %>%
  summarise_all(function(x) sd(x, na.rm = T)) %>%
  t()

# Combine these
tib_conversion <- tibble(samples = row.names(df_unscaled),
                         unscaled = df_unscaled[, 1],
                         scaled = df_scaled[, 1]) %>%
  mutate(ratio = unscaled / scaled)

# And plot
tib_conversion %>%
  ggplot(aes(x = 1, y = ratio)) +
  geom_quasirandom(col = "darkgrey") +
  geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  geom_hline(yintercept = c(1), col = "black", linetype = "dashed") +
  scale_x_continuous(breaks = NULL) +
  coord_cartesian(ylim = c(0.5, 1.5)) +
  ylab("Conversion z-score to log2-ratio") +
  theme_bw() +
  theme(aspect.ratio = 3,
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# Determine the mean and confidence interval
tib_conversion
## # A tibble: 48 × 4
##    samples       unscaled scaled ratio
##    <chr>            <dbl>  <dbl> <dbl>
##  1 PT_0h_r8         1.15       1 1.15 
##  2 PT_0h_r9         1.12       1 1.12 
##  3 PT_24h_r8        1.16       1 1.16 
##  4 PT_24h_r9        1.15       1 1.15 
##  5 CTCFEL_0h_r1     1.18       1 1.18 
##  6 CTCFEL_0h_r4     0.867      1 0.867
##  7 CTCFEL_6h_r1     1.14       1 1.14 
##  8 CTCFEL_6h_r4     1.03       1 1.03 
##  9 CTCFEL_24h_r1    1.31       1 1.31 
## 10 CTCFEL_24h_r4    0.802      1 0.802
## # … with 38 more rows
mean(tib_conversion$ratio)
## [1] 1.063428
quantile(tib_conversion$ratio, c(0.025, 0.975))
##      2.5%     97.5% 
## 0.7775997 1.4469210

Let’s try to summarise all the data in a PCA plot.

# Calculate PCA
tib <- as_tibble(mcols(damid.scaling)) %>%
  dplyr::select(-contains("NQ")) %>%
  drop_na()

pca <- prcomp(t(tib))

# Gather information from PCA
tib_pca <- as_tibble(pca$x) %>%
  dplyr::select(1:5) %>%
  add_column(sample = names(tib)) %>%
  separate(sample, c("condition", "timepoint", "replicate"), remove = F) %>%
  mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
         timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)))

var_explained <- pca$sdev^2/sum(pca$sdev^2)

# Plot
tib_pca %>%
  ggplot(aes(x = PC1, y = PC2, col = timepoint, shape = condition)) +
  geom_point(size = 3) +
  xlab(paste0("PC1 (", 
              round(var_explained[1]*100, 2),
              "%)")) +
  ylab(paste0("PC2 (", 
              round(var_explained[2]*100, 2),
              "%)")) +
  theme_bw() +
  theme(aspect.ratio = 1)

# umap plot
tib_umap <- as_tibble(umap(tib)$data) %>%
  add_column(sample = names(tib)) %>%
  separate(sample, c("condition", "timepoint", "replicate"), remove = F) %>%
  mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
         timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)))

#colors <- c("grey30", "red", "blue", "darkgreen", "purple")
tib_umap %>%
  ggplot(aes(x = X1, y = X2, col = timepoint, shape = condition)) +
  geom_point(size = 3) +
  xlab("UMAP 1") +
  ylab("UMAP 2") +
  #scale_color_manual(values = colors, name = "Cell line") +
  theme_bw() +
  theme(aspect.ratio = 1)

I like the UMAP best. It shows the main conclusions: most changes occur for the WAPL and CTCF-WAPL cell lines.

Also, make some plots to show that replicate experiments correlate.

# Plot correlations
CompareReplicates <- function(tib, condition, r1, r2, limits = c(-3.5, 3)) {
  
  # Select data
  tib_filtered <- tib %>%
    dplyr::select(contains(condition)) %>%
    dplyr::select(contains(r1),
                  contains(r2)) %>%
    rename_all(function(x) str_replace(x, paste0(condition, "_"), "")) %>%
    rename_all(function(x) str_replace(x, paste0(r1), "r1")) %>%
    rename_all(function(x) str_replace(x, paste0(r2), "r2")) %>%
    drop_na()
  
  # Base plot
  plt_base <- tib_filtered %>%
    ggplot() +
    geom_vline(xintercept = 0, col = "blue", linetype = "dashed") +
    geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
    geom_abline(slope = 1, intercept = 0, col = "red") +
    coord_cartesian(xlim = limits, ylim = limits) +
    scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  # Plots for all time points
  plt_0h <- plt_base +
    geom_bin2d(aes(x = `0h_r1`, y = `0h_r2`), bins = 100) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`0h_r1`,
                                                   tib_filtered$`0h_r2`),
                                               2))) +
    xlab("t=0h r1") +
    ylab("t=0h r2")
  plot(plt_0h)
  
  if (grepl("PT", condition)) return()
  
  plt_6h <- plt_base +
    geom_bin2d(aes(x = `6h_r1`, y = `6h_r2`), bins = 100) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`6h_r1`,
                                                   tib_filtered$`6h_r2`),
                                               2))) +
    xlab("t=6h r1") +
    ylab("t=6h r2")
  plot(plt_6h)
  
  plt_24h <- plt_base +
    geom_bin2d(aes(x = `24h_r1`, y = `24h_r2`), bins = 100) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`24h_r1`,
                                                   tib_filtered$`24h_r2`),
                                               2))) +
    xlab("t=24h r1") +
    ylab("t=24h r2")
  plot(plt_24h)
  
  # Plot the differences
  tib_filtered <- tib_filtered %>%
    mutate(diff_6h_r1 = `6h_r1` - `0h_r1`,
           diff_24h_r1 = `24h_r1` - `0h_r1`,
           diff_6h_r2 = `6h_r2` - `0h_r2`,
           diff_24h_r2 = `24h_r2` - `0h_r2`)
  
  plt_base <- tib_filtered %>%
    ggplot() +
    geom_vline(xintercept = 0, col = "blue", linetype = "dashed") +
    geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
    coord_cartesian(xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5)) +
    scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  plt_6h <- plt_base +
    geom_bin2d(aes(x = diff_6h_r1, y = diff_6h_r2), bins = 100) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$diff_6h_r1,
                                                   tib_filtered$diff_6h_r2),
                                               2))) +
    xlab("diff t=6h r1") +
    ylab("diff t=6h r2")
  plot(plt_6h)
  
  plt_24h <- plt_base +
    geom_bin2d(aes(x = diff_24h_r1, y = diff_24h_r2), bins = 100) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$diff_24h_r1,
                                                   tib_filtered$diff_24h_r2),
                                               2))) +
    xlab("diff t=24h r1") +
    ylab("diff t=24h r2")
  plot(plt_24h)
  
}
CompareReplicatesLADs <- function(tib, condition, r1, r2) {
  
  # Select data
  tib_filtered <- tib %>%
    dplyr::select(contains(condition)) %>%
    dplyr::select(contains(r1),
                  contains(r2)) %>%
    rename_all(function(x) str_replace(x, paste0(condition, "_"), "")) %>%
    rename_all(function(x) str_replace(x, paste0(r1), "r1")) %>%
    rename_all(function(x) str_replace(x, paste0(r2), "r2")) %>%
    drop_na()
  
  # Base plot
  plt_base <- tib_filtered %>%
    ggplot() +
    geom_vline(xintercept = 0, col = "blue", linetype = "dashed") +
    geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
    coord_cartesian(xlim = c(-1, 1.5), ylim = c(-1, 1.5)) +
    scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  # Plots for all time points
  plt_0h <- plt_base +
    geom_point(aes(x = `0h_r1`, y = `0h_r2`), 
               alpha = 0.3, size = 1) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`0h_r1`,
                                                   tib_filtered$`0h_r2`),
                                               2))) +
    xlab("t=0h r1") +
    ylab("t=0h r2")
  plot(plt_0h)
  
  if (grepl("PT", condition)) return()
  
  plt_6h <- plt_base +
    geom_point(aes(x = `6h_r1`, y = `6h_r2`), 
               alpha = 0.3, size = 1) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`6h_r1`,
                                                   tib_filtered$`6h_r2`),
                                               2))) +
    xlab("t=6h r1") +
    ylab("t=6h r2")
  plot(plt_6h)
  
  plt_24h <- plt_base +
    geom_point(aes(x = `24h_r1`, y = `24h_r2`), 
               alpha = 0.3, size = 1) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$`24h_r1`,
                                                   tib_filtered$`24h_r2`),
                                               2))) +
    xlab("t=24h r1") +
    ylab("t=24h r2")
  plot(plt_24h)
  
  # Plot the differences
  tib_filtered <- tib_filtered %>%
    mutate(diff_6h_r1 = `6h_r1` - `0h_r1`,
           diff_24h_r1 = `24h_r1` - `0h_r1`,
           diff_6h_r2 = `6h_r2` - `0h_r2`,
           diff_24h_r2 = `24h_r2` - `0h_r2`)
  
  plt_base <- tib_filtered %>%
    ggplot() +
    geom_vline(xintercept = 0, col = "blue", linetype = "dashed") +
    geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
    coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1)) +
    scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  plt_6h <- plt_base +
    geom_point(aes(x = diff_6h_r1, y = diff_6h_r2), 
               alpha = 0.3, size = 1) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$diff_6h_r1,
                                                   tib_filtered$diff_6h_r2),
                                               2))) +
    xlab("diff t=6h r1") +
    ylab("diff t=6h r2")
  plot(plt_6h)
  
  plt_24h <- plt_base +
    geom_point(aes(x = diff_24h_r1, y = diff_24h_r2), 
               alpha = 0.3, size = 1) +
    ggtitle(paste0(condition, " - Rsq=", round(cor(tib_filtered$diff_24h_r1,
                                                   tib_filtered$diff_24h_r2),
                                               2))) +
    xlab("diff t=24h r1") +
    ylab("diff t=24h r2")
  plot(plt_24h)
  
}

# Compare replicates bewteen bins
tib <- as_tibble(mcols(damid.scaling))

CompareReplicates(tib, "PT", "r8", "r9")

## NULL
CompareReplicates(tib, "CTCFEL", "r1", "r4")

CompareReplicates(tib, "RAD21", "r4", "r5")

CompareReplicates(tib, "WAPL", "r1", "r2")

CompareReplicates(tib, "CTCFWAPL", "r6", "r7")

# I also want to prepare a figure that shows all the correlation values
# Do this without data scaling!
tib <- as_tibble(mcols(damid.without.scaling))

CompareReplicates(tib, "PT", "r8", "r9", limits = c(-3.5, 2.5))

## NULL
tib_cor <- tib %>%
  mutate(bin = 1:nrow(.)) %>%
  drop_na() %>%
  gather(key, value, -bin) %>%
  mutate(idx = match(key, metadata.scaling$sample),
         condition = metadata.scaling$condition[idx],
         timepoint = metadata.scaling$timepoint[idx],
         replicate = metadata.scaling$replicate[idx])
#separate(key, c("condition", "timepoint", "replicate"), remove = F) %>%

tib_cor <- tib_cor %>%
  group_by(condition, timepoint) %>%
  nest() %>%
  mutate(data2 = map(data, 
                     function(df) select(df, -replicate, -idx)),
         data2 = map(data2, 
                     spread, key = key, value = value),
         data2 = map(data2, 
                     function(df) select(df, -bin)),
         pearson = map(data2, cor),
         pearson = map(pearson, 
                       function(df) gather(as_tibble(df),
                                           key = key, value = value))) %>%
  unnest(pearson) %>%
  dplyr::select(-contains("data")) %>%
  ungroup()

tib_cor <- tib_cor %>%
  group_by(condition, timepoint) %>%
  mutate(target = rep(unique(key), 
                      length(unique(key)))) %>%
  ungroup() %>%
  filter(key != target,
         key > target)

# Plot
tib_cor %>%
  ggplot(aes(x = 1, y = value)) +
  geom_quasirandom(col = "darkgrey") +
  geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  geom_hline(yintercept = c(0, 1), col = "black", linetype = "dashed") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_x_continuous(breaks = NULL) +
  ylab("Pearson") +
  theme_bw() +
  theme(aspect.ratio = 3,
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

tib_cor %>%
  ggplot(aes(x = 1, y = value)) +
  geom_quasirandom(aes(col = condition == "PT")) +
  geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  geom_hline(yintercept = c(0, 1), col = "black", linetype = "dashed") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_x_continuous(breaks = NULL) +
  scale_color_manual(values = c("darkgrey", "red")) +
  ylab("Pearson") +
  theme_bw() +
  theme(aspect.ratio = 3,
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# LAD analyses
tib <- as_tibble(mcols(damid.scaling))
ovl <- as_tibble(findOverlaps(damid.scaling,
                              LADs[[1]]))

tib_inLADs <- tib %>%
  mutate(queryHits = 1:nrow(.)) %>%
  full_join(ovl) %>%
  drop_na(subjectHits) %>%
  group_by(subjectHits) %>%
  dplyr::summarise_all(function(x) mean(x, na.rm = T))

CompareReplicatesLADs(tib_inLADs, "PT", "r8", "r9")

## NULL
CompareReplicatesLADs(tib_inLADs, "CTCFEL", "r1", "r4")

CompareReplicatesLADs(tib_inLADs, "RAD21", "r4", "r5")

CompareReplicatesLADs(tib_inLADs, "WAPL", "r1", "r2")

CompareReplicatesLADs(tib_inLADs, "CTCFWAPL", "r6", "r7")

# Finally, all LAD correlations
tib_cor <- tib_inLADs %>%
  mutate(bin = 1:nrow(.)) %>%
  drop_na() %>%
  gather(key, value, -bin) %>%
  mutate(idx = match(key, metadata.scaling$sample),
         condition = metadata.scaling$condition[idx],
         timepoint = metadata.scaling$timepoint[idx],
         replicate = metadata.scaling$replicate[idx])
#separate(key, c("condition", "timepoint", "replicate"), remove = F) %>%

tib_cor <- tib_cor %>%
  group_by(condition, timepoint) %>%
  nest() %>%
  mutate(data2 = map(data, 
                     function(df) select(df, -replicate, -idx)),
         data2 = map(data2, 
                     spread, key = key, value = value),
         data2 = map(data2, 
                     function(df) select(df, -bin)),
         pearson = map(data2, cor),
         pearson = map(pearson, 
                       function(df) gather(as_tibble(df),
                                           key = key, value = value))) %>%
  unnest(pearson) %>%
  dplyr::select(-contains("data")) %>%
  ungroup()

tib_cor <- tib_cor %>%
  group_by(condition, timepoint) %>%
  mutate(target = rep(unique(key), 
                      length(unique(key)))) %>%
  ungroup() %>%
  filter(key != target,
         key > target) %>% 
  drop_na()

tib_cor %>% 
  print(n = 40)
## # A tibble: 39 × 5
##    condition timepoint key             value target         
##    <fct>     <fct>     <chr>           <dbl> <chr>          
##  1 PT        0h        PT_0h_r9        0.986 PT_0h_r8       
##  2 PT        24h       PT_24h_r9       0.987 PT_24h_r8      
##  3 CTCFEL    0h        CTCFEL_0h_r4    0.842 CTCFEL_0h_r1   
##  4 CTCFEL    6h        CTCFEL_6h_r4    0.894 CTCFEL_6h_r1   
##  5 CTCFEL    24h       CTCFEL_24h_r4   0.903 CTCFEL_24h_r1  
##  6 CTCFEL    96h       CTCFEL_96h_r4   0.984 CTCFEL_96h_r1  
##  7 WAPL      0h        WAPL_0h_r2      0.972 WAPL_0h_r1     
##  8 WAPL      0h        WAPL_0h_r5      0.938 WAPL_0h_r1     
##  9 WAPL      0h        WAPL_0h_r5      0.907 WAPL_0h_r2     
## 10 WAPL      6h        WAPL_6h_r2      0.981 WAPL_6h_r1     
## 11 WAPL      6h        WAPL_6h_r5      0.956 WAPL_6h_r1     
## 12 WAPL      6h        WAPL_6h_r5      0.934 WAPL_6h_r2     
## 13 WAPL      24h       WAPL_24h_r2     0.977 WAPL_24h_r1    
## 14 WAPL      24h       WAPL_24h_r5     0.953 WAPL_24h_r1    
## 15 WAPL      24h       WAPL_24h_r5     0.930 WAPL_24h_r2    
## 16 WAPL      96h       WAPL_96h_r2     0.988 WAPL_96h_r1    
## 17 WAPL      96h       WAPL_96h_r5     0.981 WAPL_96h_r1    
## 18 WAPL      96h       WAPL_96h_r5     0.983 WAPL_96h_r2    
## 19 CTCFWAPL  0h        CTCFWAPL_0h_r2  0.896 CTCFWAPL_0h_r1 
## 20 CTCFWAPL  0h        CTCFWAPL_0h_r6  0.927 CTCFWAPL_0h_r1 
## 21 CTCFWAPL  0h        CTCFWAPL_0h_r6  0.961 CTCFWAPL_0h_r2 
## 22 CTCFWAPL  0h        CTCFWAPL_0h_r7  0.918 CTCFWAPL_0h_r1 
## 23 CTCFWAPL  0h        CTCFWAPL_0h_r7  0.928 CTCFWAPL_0h_r2 
## 24 CTCFWAPL  0h        CTCFWAPL_0h_r7  0.937 CTCFWAPL_0h_r6 
## 25 CTCFWAPL  6h        CTCFWAPL_6h_r2  0.964 CTCFWAPL_6h_r1 
## 26 CTCFWAPL  6h        CTCFWAPL_6h_r6  0.976 CTCFWAPL_6h_r1 
## 27 CTCFWAPL  6h        CTCFWAPL_6h_r6  0.967 CTCFWAPL_6h_r2 
## 28 CTCFWAPL  6h        CTCFWAPL_6h_r7  0.969 CTCFWAPL_6h_r1 
## 29 CTCFWAPL  6h        CTCFWAPL_6h_r7  0.951 CTCFWAPL_6h_r2 
## 30 CTCFWAPL  6h        CTCFWAPL_6h_r7  0.958 CTCFWAPL_6h_r6 
## 31 CTCFWAPL  24h       CTCFWAPL_24h_r6 0.975 CTCFWAPL_24h_r1
## 32 CTCFWAPL  24h       CTCFWAPL_24h_r7 0.981 CTCFWAPL_24h_r1
## 33 CTCFWAPL  24h       CTCFWAPL_24h_r7 0.973 CTCFWAPL_24h_r6
## 34 CTCFWAPL  96h       CTCFWAPL_96h_r2 0.958 CTCFWAPL_96h_r1
## 35 CTCFWAPL  96h       CTCFWAPL_96h_r6 0.943 CTCFWAPL_96h_r1
## 36 CTCFWAPL  96h       CTCFWAPL_96h_r6 0.963 CTCFWAPL_96h_r2
## 37 RAD21     0h        RAD21_0h_r5     0.966 RAD21_0h_r4    
## 38 RAD21     6h        RAD21_6h_r5     0.978 RAD21_6h_r4    
## 39 RAD21     24h       RAD21_24h_r5    0.980 RAD21_24h_r4
# Plot
tib_cor %>%
  ggplot(aes(x = 1, y = value)) +
  geom_quasirandom(col = "darkgrey") +
  geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  geom_hline(yintercept = c(0, 1), col = "black", linetype = "dashed") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_x_continuous(breaks = NULL) +
  ylab("Pearson") +
  theme_bw() +
  theme(aspect.ratio = 3,
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

tib_cor %>%
  ggplot(aes(x = 1, y = value)) +
  geom_quasirandom(aes(col = condition == "PT")) +
  geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  geom_hline(yintercept = c(0, 1), col = "black", linetype = "dashed") +
  coord_cartesian(ylim = c(0, 1)) +
  scale_x_continuous(breaks = NULL) +
  scale_color_manual(values = c("darkgrey", "red")) +
  ylab("Pearson") +
  theme_bw() +
  theme(aspect.ratio = 3,
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) 

The most important figure here is the Pearson correlation distribution between all replicate experiments. For the rest, the figures show that replicates correlate (linearly), but also that the differences over time are only somewhat correlating. Thus, the effect size is small and per-bin analyses are not going to work.

Before I can continue with the DamID data, I want to convert the values first to z-scores. Also, make density plots.

# Scale DamID, plot density before and after
PlotDensity(damid, "Density before scaling", xlimits = c(-4, 3))

damid <- ScaleDamID(damid)
PlotDensity(damid, "Density after scaling", xlimits = c(-3, 2.5))

PlotDensity(damid, "Density after scaling", xlimits = c(-3, 2.5),
            conditions = c("CTCFEL", "WAPL", "CTCFWAPL", "RAD21"))

# Also scale counts - this is a bit weird at z-scale, but hey 
# damid.lmnb1 <- ScaleDamID(damid.lmnb1)
# damid.dam <- ScaleDamID(damid.dam)

Finally, prepare PCA plots for the combined samples.

# Calculate PCA
tib <- as_tibble(mcols(damid)) %>%
  drop_na() %>%
  dplyr::select(-contains("CTCF_"))

pca <- prcomp(t(tib))

# Gather information from PCA
tib_pca <- as_tibble(pca$x) %>%
  dplyr::select(1:5) %>%
  add_column(sample = names(tib)) %>%
  separate(sample, c("condition", "timepoint"), remove = F) %>%
  mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
         timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)))

var_explained <- pca$sdev^2/sum(pca$sdev^2)

# Plot
tib_pca %>%
  ggplot(aes(x = PC1, y = PC2, col = timepoint, shape = condition)) +
  geom_point() +
  xlab(paste0("PC1 (", 
              round(var_explained[1]*100, 2),
              "%)")) +
  ylab(paste0("PC2 (", 
              round(var_explained[2]*100, 2),
              "%)")) +
  theme_bw() +
  theme(aspect.ratio = 1)

Good.

2. DamID scores at LAD borders

I need to add the distance to LAD border (and which LAD border) to the DamID data. First, filter LAD borders for positioning near active genes.

Note that most of these analyses are old and removed. Instead, see below for better analyses of LAD borders, where I process all borders individually.

# Only for borders without genes
borders <- borders[borders$ovl_gene == F] 

Having these distances, I can now start plotting the DamID profile around LAD borders. Also, I can plot the difference between the later timepoints and 0h.

3. Individual LAD borders

The previous plots are all based on average effects. Here, I will try to work with individual LAD borders, to see if the real differences are only for a subset of CTCF borders.

First, I will determine the distance to LAD borders for all genomic bins. Use LAD borders defined in the pA-DamID data for the parental clone.

GatherBorders <- function(damid, borders, lads) {
  
  # Get the distances to the nearest LAD borders for all damid bins
  damid.mid <- damid
  start(damid.mid) <- end(damid.mid) <- (start(damid.mid) + end(damid.mid)) / 2
  
  dis <- as_tibble(distanceToNearest(damid.mid, borders))
  
  # Round distances to the nearest 5kb
  dis <- dis %>%
    mutate(distance = ceiling(distance / 5000) * 5000)
  
  # Also, determine which bins overlap with lads
  ovl <- damid.mid %over% lads
  
  # Process data as tibble
  tib <- as_tibble(damid.mid) %>%
    add_column(border_idx = dis$subjectHits,
               border_ctcf = borders$CTCF[dis$subjectHits],
               border_ctcf_n = borders$CTCF.count[dis$subjectHits],
               border_ctcf_strand = borders$CTCF_strand[dis$subjectHits],
               distance = dis$distance,
               within_lad = ovl) %>%
    mutate(distance = case_when(within_lad == 1 ~ distance,
                                T ~ -distance),
           border_ctcf_strand = factor(border_ctcf_strand,
                                       levels = c("outwards", "inwards",
                                                  "ambiguous", "nonCTCF")),
           border_ctcf_n = case_when(border_ctcf_n >= 3 ~ ">=3", 
                                     border_ctcf_n == 2 ~ "2",
                                     border_ctcf_n == 1 ~ "1",
                                     T ~ "nonCTCF"),
           border_ctcf_n = factor(border_ctcf_n,
                                  levels = c("nonCTCF", "1", "2", ">=3"))) %>%
    filter(abs(distance) < 2e5)
  
  # # Only work with "complete" borders (remove small iLADs / LADs)
  # borders_complete <- which(as.numeric(table(tib$border_idx)) > 25)
  # 
  # tib <- tib %>%
  #   filter(border_idx %in% borders_complete)
  
  # Plot all lines as "test"
  tib %>%
    ggplot(aes(x = distance / 1e3, y = PT_0h)) +
    #geom_line(aes(group = border_idx), alpha = 0.1) +
    geom_smooth(aes(group = border_ctcf, col = border_ctcf), se = T) +
    xlab("Distance to LAD border (kb)") +
    ylab("pA-DamID (z-score)") +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  tib
  
}

tib_damid <- GatherBorders(damid, borders = borders, 
                           lads = LADs[["mESC_pA_PT"]])
tib_damid_lmnb1 <- GatherBorders(damid.lmnb1, borders = borders, 
                                 lads = LADs[["mESC_pA_PT"]])
tib_damid_dam <- GatherBorders(damid.dam, borders = borders, 
                               lads = LADs[["mESC_pA_PT"]])

Next, plot the effect of depletion at LAD borders. I will prepare several figures / results here:

  • Average profile (+ conf. int.) of all borders.
  • Per-border effect outside of LAD borders.
  • Statistics of this.

Not only do this for depletions at +/- CTCF borders, but also based on CTCF strand and CTCF count. Also, for the unnormalized data (as cpm) to illustrate whether the effects are caused by differences in LaminB1 or Dam signal.

sd_fun <- function(x, y = 1.3) {
  return(data.frame(y = y, label = round(sd(x), 2)))
}

PlotBordersWithConfidenceIntervals <- function(tib, samples,
                                               ylim = c(-0.9, 0.65),
                                               smooth = 1,
                                               group = "border_ctcf",
                                               keep_PT = F,
                                               filter_96h = T) {
  
  if (filter_96h) {
    samples <- samples[! grepl("96h", samples)]
  }
  
  # Gather tib
  if (smooth != 1) {
    tib <- tib %>%
      mutate_at(vars(ends_with("h")), runmean, k = smooth, endrule = "mean")
  }
  
  tib_gather <- tib %>%
    gather(key, value, 
           grep("[0-9]+h$", names(tib), value = T)) %>%
    mutate(key = factor(key, levels = levels(metadata.damid$sample)))
  tib_gather$group <- tib_gather %>% pull(group)
  
  if (keep_PT) {
    plt <- tib_gather %>%
      filter(key %in% samples) %>%
      ggplot(aes(x = distance / 1e3, y = value, 
                 group = key, col = key, fill = key)) +
      annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10, 
               fill = "grey", alpha = 0.3) +  
      geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
      geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
      #geom_line(aes(group = border_idx), alpha = 0.1) +
      #geom_smooth(aes(group = key, col = key), se = T) +
      stat_summary(fun = mean, geom = "line", size = 1) +
      stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
                   fun.args = list(mult = 1.96)) +
      #facet_grid(. ~ border_ctcf) +
      facet_grid(group ~ .) +
      xlab("Distance to LAD border (kb)") +
      ylab("pA-DamID (z-score)") +
      coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
      scale_color_brewer(palette = "Set1", name = "Border class") +
      scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
      theme_bw() +
      theme(aspect.ratio = 1,
            axis.text.x = element_text(angle = 90, hjust = 1))
    plot(plt)
    
    samples_without_pt <- samples
    
    if (length(samples) == 1) return(NULL)
    
  } else {
    
    # Plot some tracks
    plt <- tib_gather %>%
      filter(key %in% samples & key != "PT_0h") %>%
      ggplot(aes(x = distance / 1e3, y = value, 
                 group = key, col = key, fill = key)) +
      annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10, 
               fill = "grey", alpha = 0.3) +  
      geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
      geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
      #geom_line(aes(group = border_idx), alpha = 0.1) +
      #geom_smooth(aes(group = key, col = key), se = T) +
      stat_summary(fun = mean, geom = "line", size = 1) +
      stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
                   fun.args = list(mult = 1.96)) +
      #facet_grid(. ~ border_ctcf) +
      facet_grid(group ~ .) +
      xlab("Distance to LAD border (kb)") +
      ylab("pA-DamID (z-score)") +
      coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
      scale_color_brewer(palette = "Set1", name = "Border class") +
      scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
      theme_bw() +
      theme(aspect.ratio = 1,
            axis.text.x = element_text(angle = 90, hjust = 1))
    plot(plt)
    
    samples_without_pt <- samples[2:length(samples)]
    
  }
  
  # # Can I also quantify the difference within the LAD?
  # tib_ctcf <- tib %>%
  #   filter(distance > 20e3 & distance < 120e3) %>%
  #   dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
  #   dplyr::rename_at(vars(group), ~ "group") %>%
  #   group_by(border_idx, group) %>%
  #   summarise_at(samples_without_pt, mean, na.rm = T) %>%
  #   ungroup() %>%
  #   dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
  #   mutate_at(samples_without_pt[2:length(samples_without_pt)], 
  #             function(x) x - .$base) %>%
  #   dplyr::select(border_idx, group, 
  #                 samples_without_pt[2:length(samples_without_pt)]) %>%
  #   gather(key, value, -group, -border_idx) %>%
  #   mutate(key = factor(key, samples_without_pt),
  #          group = factor(group, levels = c("nonCTCF",
  #                                           "CTCF",
  #                                           "outwards", "inwards", "ambiguous",
  #                                           "1", "2", ">=3")))
  # 
  # # Plot by time point
  # plt <- tib_ctcf %>%
  #   ggplot(aes(x = group, y = value, col = group)) +
  #   geom_quasirandom() +
  #   geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  #   geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
  #   facet_grid(. ~ key) +
  #   coord_cartesian(ylim = c(-1.5, 1.5)) +
  #   scale_color_brewer(palette = "Set2", guide = "none") +
  #   xlab("") +
  #   ylab("Difference within LAD with t=0h") +
  #   theme_bw() +
  #   theme(aspect.ratio = 1.5,
  #         axis.text.x = element_text(angle = 90, hjust = 1))
  # plot(plt)
  
  
  # Can I also quantify the difference between the local CTCF depletion?
  tib_ctcf <- tib %>%
    filter(distance > -20e3 & distance < 0) %>%
    dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
    dplyr::rename_at(vars(group), ~ "group") %>%
    group_by(border_idx, group) %>%
    summarise_at(samples_without_pt, mean, na.rm = T) %>%
    ungroup() %>%
    dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
    mutate_at(samples_without_pt[2:length(samples_without_pt)], 
              function(x) x - .$base) %>%
    dplyr::select(border_idx, group, 
                  samples_without_pt[2:length(samples_without_pt)]) %>%
    gather(key, value, -group, -border_idx) %>%
    mutate(key = factor(key, samples_without_pt),
           group = factor(group, levels = c("nonCTCF",
                                            "CTCF",
                                            "outwards", "inwards", "ambiguous",
                                            "1", "2", ">=3")))
  
  # Plot by time point
  plt <- tib_ctcf %>%
    ggplot(aes(x = group, y = value, col = group)) +
    geom_quasirandom() +
    geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
    geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
    stat_summary(fun.data = sd_fun, geom = "text", col = "black") +
    facet_grid(. ~ key) +
    coord_cartesian(ylim = c(-1.5, 1.5)) +
    scale_color_brewer(palette = "Set2", guide = "none") +
    xlab("") +
    ylab("Difference outside LAD border with t=0h") +
    theme_bw() +
    theme(aspect.ratio = 1.5,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # # Plot by border class
  # plt <- tib_ctcf %>%
  #   ggplot(aes(x = key, y = value, col = key)) +
  #   geom_quasirandom() +
  #   geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
  #   geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
  #   facet_grid(. ~ group) +
  #   scale_color_brewer(palette = "Set1", guide = F) +
  #   xlab("") +
  #   ylab("Difference outside LAD border with t=0h") +
  #   theme_bw() +
  #   theme(aspect.ratio = 1.5,
  #         axis.text.x = element_text(angle = 90, hjust = 1))
  # plot(plt)
  
  # Statistics - difference from diff = 0
  tib_ctcf %>%
    group_by(group, key) %>%
    dplyr::summarise(pvalue = wilcox.test(value)$p.value) %>%
    ungroup() %>%
    mutate(padj = p.adjust(pvalue),
           sign = padj < 0.05) %>%
    print(n = 40)
  
  # Statistics - difference with nonCTCF borders
  tib_stat <- tibble()
  
  for (current_group in levels(tib_ctcf$group)) {
    for (current_key in levels(tib_ctcf$key)) {
      
      if (! current_group %in% tib_ctcf$group) next
      if (current_group %in% "nonCTCF") next
      if (! current_key %in% tib_ctcf$key) next
      
      
      tmp <- wilcox.test(tib_ctcf %>%
                           filter(group == current_group &
                                    key == current_key) %>%
                           pull(value),
                         tib_ctcf %>%
                           filter(group == "nonCTCF" &
                                    key == current_key) %>%
                           pull(value))
      
      tib_stat <- bind_rows(tib_stat,
                            tibble(group = current_group,
                                   key = current_key,
                                   pvalue = tmp$p.value))
    }
  }
  
  tib_stat %>%
    mutate(padj = p.adjust(pvalue),
           sign = padj < 0.05) %>%
    print(n = 40)
  
  tib_stat
  
}

# CTCF vs non-CTCF
PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFEL_0h",
                                     "RAD21_0h", "WAPL_0h",
                                     "CTCFWAPL_0h"),
                                   keep_PT = T)

## # A tibble: 8 × 5
##   group   key           pvalue     padj sign 
##   <fct>   <fct>          <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFEL_0h   3.83e-11 2.30e-10 TRUE 
## 2 nonCTCF RAD21_0h    8.64e- 1 8.64e- 1 FALSE
## 3 nonCTCF WAPL_0h     1.89e- 3 3.77e- 3 TRUE 
## 4 nonCTCF CTCFWAPL_0h 4.26e-10 1.71e- 9 TRUE 
## 5 CTCF    CTCFEL_0h   4.05e- 4 1.21e- 3 TRUE 
## 6 CTCF    RAD21_0h    2.40e-15 1.68e-14 TRUE 
## 7 CTCF    WAPL_0h     9.73e-29 7.79e-28 TRUE 
## 8 CTCF    CTCFWAPL_0h 1.73e-10 8.65e-10 TRUE 
## # A tibble: 4 × 5
##   group key           pvalue     padj sign 
##   <chr> <chr>          <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFEL_0h   3.16e-12 6.33e-12 TRUE 
## 2 CTCF  RAD21_0h    2.80e- 9 2.80e- 9 TRUE 
## 3 CTCF  WAPL_0h     3.85e-27 1.54e-26 TRUE 
## 4 CTCF  CTCFWAPL_0h 7.91e-19 2.37e-18 TRUE
## # A tibble: 4 × 3
##   group key           pvalue
##   <chr> <chr>          <dbl>
## 1 CTCF  CTCFEL_0h   3.16e-12
## 2 CTCF  RAD21_0h    2.80e- 9
## 3 CTCF  WAPL_0h     3.85e-27
## 4 CTCF  CTCFWAPL_0h 7.91e-19
tib_stat <- tibble()

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h"),
                                          keep_PT = T, ylim = c(-0.7, 0.7))

## # A tibble: 2 × 5
##   group   key    pvalue  padj sign 
##   <fct>   <fct>   <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h 0.884  0.884 FALSE
## 2 CTCF    PT_24h 0.0633 0.127 FALSE
## # A tibble: 1 × 5
##   group key    pvalue  padj sign 
##   <chr> <chr>   <dbl> <dbl> <lgl>
## 1 CTCF  PT_24h  0.208 0.208 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"), 
                                          keep_PT = T, ylim = c(-0.7, 0.7))

## # A tibble: 8 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF PT_24h     8.84e- 1 8.84e- 1 FALSE
## 2 nonCTCF CTCFEL_0h  3.83e-11 3.06e-10 TRUE 
## 3 nonCTCF CTCFEL_6h  1.13e- 7 7.88e- 7 TRUE 
## 4 nonCTCF CTCFEL_24h 2.28e- 2 6.85e- 2 FALSE
## 5 CTCF    PT_24h     6.33e- 2 1.27e- 1 FALSE
## 6 CTCF    CTCFEL_0h  4.05e- 4 2.43e- 3 TRUE 
## 7 CTCF    CTCFEL_6h  6.95e- 4 3.47e- 3 TRUE 
## 8 CTCF    CTCFEL_24h 3.58e- 3 1.43e- 2 TRUE 
## # A tibble: 4 × 5
##   group key          pvalue     padj sign 
##   <chr> <chr>         <dbl>    <dbl> <lgl>
## 1 CTCF  PT_24h     2.08e- 1 6.24e- 1 FALSE
## 2 CTCF  CTCFEL_0h  3.16e-12 1.27e-11 TRUE 
## 3 CTCF  CTCFEL_6h  5.75e- 1 8.07e- 1 FALSE
## 4 CTCF  CTCFEL_24h 4.03e- 1 8.07e- 1 FALSE
## # A tibble: 4 × 3
##   group key          pvalue
##   <chr> <chr>         <dbl>
## 1 CTCF  PT_24h     2.08e- 1
## 2 CTCF  CTCFEL_0h  3.16e-12
## 3 CTCF  CTCFEL_6h  5.75e- 1
## 4 CTCF  CTCFEL_24h 4.03e- 1
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"), 
                                          ylim = c(-0.7, 0.7))

## # A tibble: 4 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  1.73e- 1 1.73e- 1 FALSE
## 2 nonCTCF CTCFEL_24h 2.50e- 7 4.99e- 7 TRUE 
## 3 CTCF    CTCFEL_6h  5.14e-18 2.06e-17 TRUE 
## 4 CTCF    CTCFEL_24h 4.90e-15 1.47e-14 TRUE 
## # A tibble: 2 × 5
##   group key          pvalue     padj sign 
##   <chr> <chr>         <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFEL_6h  1.55e-15 1.55e-15 TRUE 
## 2 CTCF  CTCFEL_24h 4.58e-21 9.16e-21 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h", 
                                     "CTCFNQ_24h", "CTCFNQ_96h"))

## # A tibble: 4 × 5
##   group   key          pvalue    padj sign 
##   <fct>   <fct>         <dbl>   <dbl> <lgl>
## 1 nonCTCF CTCFNQ_6h  0.0117   0.0234  TRUE 
## 2 nonCTCF CTCFNQ_24h 0.00155  0.00465 TRUE 
## 3 CTCF    CTCFNQ_6h  0.193    0.193   FALSE
## 4 CTCF    CTCFNQ_24h 0.000345 0.00138 TRUE 
## # A tibble: 2 × 5
##   group key        pvalue  padj sign 
##   <chr> <chr>       <dbl> <dbl> <lgl>
## 1 CTCF  CTCFNQ_6h   0.409 0.817 FALSE
## 2 CTCF  CTCFNQ_24h  0.675 0.817 FALSE
## # A tibble: 2 × 3
##   group key        pvalue
##   <chr> <chr>       <dbl>
## 1 CTCF  CTCFNQ_6h   0.409
## 2 CTCF  CTCFNQ_24h  0.675
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          ylim = c(-0.95, 0.6))

## # A tibble: 4 × 5
##   group   key        pvalue     padj sign 
##   <fct>   <fct>       <dbl>    <dbl> <lgl>
## 1 nonCTCF WAPL_6h  1.45e- 5 1.45e- 5 TRUE 
## 2 nonCTCF WAPL_24h 1.23e- 7 2.46e- 7 TRUE 
## 3 CTCF    WAPL_6h  3.42e-32 1.37e-31 TRUE 
## 4 CTCF    WAPL_24h 5.02e-30 1.51e-29 TRUE 
## # A tibble: 2 × 5
##   group key        pvalue     padj sign 
##   <chr> <chr>       <dbl>    <dbl> <lgl>
## 1 CTCF  WAPL_6h  8.50e-11 1.70e-10 TRUE 
## 2 CTCF  WAPL_24h 1.79e- 7 1.79e- 7 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          ylim = c(-0.95, 0.6))

## # A tibble: 4 × 5
##   group   key               pvalue        padj sign 
##   <fct>   <fct>              <dbl>       <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.0847      0.0847      FALSE
## 2 nonCTCF CTCFWAPL_24h 0.0161      0.0322      TRUE 
## 3 CTCF    CTCFWAPL_6h  0.000000228 0.000000683 TRUE 
## 4 CTCF    CTCFWAPL_24h 0.000000128 0.000000514 TRUE 
## # A tibble: 2 × 5
##   group key                pvalue         padj sign 
##   <chr> <chr>               <dbl>        <dbl> <lgl>
## 1 CTCF  CTCFWAPL_6h  0.000000229  0.000000229  TRUE 
## 2 CTCF  CTCFWAPL_24h 0.0000000150 0.0000000300 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          ylim = c(-0.95, 0.6))

## # A tibble: 4 × 5
##   group   key         pvalue     padj sign 
##   <fct>   <fct>        <dbl>    <dbl> <lgl>
## 1 nonCTCF RAD21_6h  1.29e-12 5.15e-12 TRUE 
## 2 nonCTCF RAD21_24h 1.19e- 2 2.38e- 2 TRUE 
## 3 CTCF    RAD21_6h  3.17e- 3 9.52e- 3 TRUE 
## 4 CTCF    RAD21_24h 5.29e- 1 5.29e- 1 FALSE
## # A tibble: 2 × 5
##   group key        pvalue   padj sign 
##   <chr> <chr>       <dbl>  <dbl> <lgl>
## 1 CTCF  RAD21_6h  0.00787 0.0157 TRUE 
## 2 CTCF  RAD21_24h 0.0373  0.0373 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40)
## # A tibble: 9 × 5
##   group key            pvalue     padj sign 
##   <chr> <chr>           <dbl>    <dbl> <lgl>
## 1 CTCF  PT_24h       2.08e- 1 2.08e- 1 FALSE
## 2 CTCF  CTCFEL_6h    1.55e-15 1.24e-14 TRUE 
## 3 CTCF  CTCFEL_24h   4.58e-21 4.12e-20 TRUE 
## 4 CTCF  WAPL_6h      8.50e-11 5.95e-10 TRUE 
## 5 CTCF  WAPL_24h     1.79e- 7 8.96e- 7 TRUE 
## 6 CTCF  CTCFWAPL_6h  2.29e- 7 9.15e- 7 TRUE 
## 7 CTCF  CTCFWAPL_24h 1.50e- 8 9.00e- 8 TRUE 
## 8 CTCF  RAD21_6h     7.87e- 3 2.36e- 2 TRUE 
## 9 CTCF  RAD21_24h    3.73e- 2 7.46e- 2 FALSE
# CTCF + orientation vs non-CTCF
tib_stat <- tibble()

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.80, 0.80),
                                   keep_PT = T)

## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "PT_24h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-0.80, 0.80),
                                          keep_PT = T)

## # A tibble: 4 × 5
##   group     key    pvalue   padj sign 
##   <fct>     <fct>   <dbl>  <dbl> <lgl>
## 1 nonCTCF   PT_24h 0.884  0.884  FALSE
## 2 outwards  PT_24h 0.407  0.814  FALSE
## 3 inwards   PT_24h 0.189  0.566  FALSE
## 4 ambiguous PT_24h 0.0167 0.0670 FALSE
## # A tibble: 3 × 5
##   group     key    pvalue  padj sign 
##   <chr>     <chr>   <dbl> <dbl> <lgl>
## 1 outwards  PT_24h 0.392  0.603 FALSE
## 2 inwards   PT_24h 0.302  0.603 FALSE
## 3 ambiguous PT_24h 0.0507 0.152 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-0.80, 0.80))

## # A tibble: 8 × 5
##   group     key              pvalue        padj sign 
##   <fct>     <fct>             <dbl>       <dbl> <lgl>
## 1 nonCTCF   CTCFEL_6h  0.173        0.173       FALSE
## 2 nonCTCF   CTCFEL_24h 0.000000250  0.00000125  TRUE 
## 3 outwards  CTCFEL_6h  0.000151     0.000414    TRUE 
## 4 outwards  CTCFEL_24h 0.0000309    0.000124    TRUE 
## 5 inwards   CTCFEL_6h  0.0000000598 0.000000359 TRUE 
## 6 inwards   CTCFEL_24h 0.000138     0.000414    TRUE 
## 7 ambiguous CTCFEL_6h  0.0000000330 0.000000231 TRUE 
## 8 ambiguous CTCFEL_24h 0.0000000200 0.000000160 TRUE 
## # A tibble: 6 × 5
##   group     key          pvalue     padj sign 
##   <chr>     <chr>         <dbl>    <dbl> <lgl>
## 1 outwards  CTCFEL_6h  1.91e- 5 1.91e- 5 TRUE 
## 2 outwards  CTCFEL_24h 6.50e- 9 2.15e- 8 TRUE 
## 3 inwards   CTCFEL_6h  6.45e- 9 2.15e- 8 TRUE 
## 4 inwards   CTCFEL_24h 5.38e- 9 2.15e- 8 TRUE 
## 5 ambiguous CTCFEL_6h  2.00e- 9 9.98e- 9 TRUE 
## 6 ambiguous CTCFEL_24h 2.27e-14 1.36e-13 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h", 
                                     "CTCFNQ_24h", "CTCFNQ_96h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.80, 0.80))

## # A tibble: 8 × 5
##   group     key          pvalue    padj sign 
##   <fct>     <fct>         <dbl>   <dbl> <lgl>
## 1 nonCTCF   CTCFNQ_6h  0.0117   0.0703  FALSE
## 2 nonCTCF   CTCFNQ_24h 0.00155  0.0108  TRUE 
## 3 outwards  CTCFNQ_6h  0.129    0.499   FALSE
## 4 outwards  CTCFNQ_24h 0.125    0.499   FALSE
## 5 inwards   CTCFNQ_6h  0.0504   0.252   FALSE
## 6 inwards   CTCFNQ_24h 0.000493 0.00394 TRUE 
## 7 ambiguous CTCFNQ_6h  0.365    0.695   FALSE
## 8 ambiguous CTCFNQ_24h 0.348    0.695   FALSE
## # A tibble: 6 × 5
##   group     key        pvalue  padj sign 
##   <chr>     <chr>       <dbl> <dbl> <lgl>
## 1 outwards  CTCFNQ_6h  0.819  1     FALSE
## 2 outwards  CTCFNQ_24h 0.970  1     FALSE
## 3 inwards   CTCFNQ_6h  0.792  1     FALSE
## 4 inwards   CTCFNQ_24h 0.175  0.875 FALSE
## 5 ambiguous CTCFNQ_6h  0.0394 0.237 FALSE
## 6 ambiguous CTCFNQ_24h 0.496  1     FALSE
## # A tibble: 6 × 3
##   group     key        pvalue
##   <chr>     <chr>       <dbl>
## 1 outwards  CTCFNQ_6h  0.819 
## 2 outwards  CTCFNQ_24h 0.970 
## 3 inwards   CTCFNQ_6h  0.792 
## 4 inwards   CTCFNQ_24h 0.175 
## 5 ambiguous CTCFNQ_6h  0.0394
## 6 ambiguous CTCFNQ_24h 0.496
tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key        pvalue     padj sign 
##   <fct>     <fct>       <dbl>    <dbl> <lgl>
## 1 nonCTCF   WAPL_6h  1.45e- 5 4.36e- 5 TRUE 
## 2 nonCTCF   WAPL_24h 1.23e- 7 4.93e- 7 TRUE 
## 3 outwards  WAPL_6h  6.51e- 5 1.30e- 4 TRUE 
## 4 outwards  WAPL_24h 2.40e- 4 2.40e- 4 TRUE 
## 5 inwards   WAPL_6h  4.57e-17 3.66e-16 TRUE 
## 6 inwards   WAPL_24h 5.41e-17 3.78e-16 TRUE 
## 7 ambiguous WAPL_6h  1.67e-13 1.00e-12 TRUE 
## 8 ambiguous WAPL_24h 1.83e-12 9.14e-12 TRUE 
## # A tibble: 6 × 5
##   group     key             pvalue         padj sign 
##   <chr>     <chr>            <dbl>        <dbl> <lgl>
## 1 outwards  WAPL_6h  0.0314        0.0628       FALSE
## 2 outwards  WAPL_24h 0.0849        0.0849       FALSE
## 3 inwards   WAPL_6h  0.00000000548 0.0000000329 TRUE 
## 4 inwards   WAPL_24h 0.00000138    0.00000691   TRUE 
## 5 ambiguous WAPL_6h  0.00000295    0.0000118    TRUE 
## 6 ambiguous WAPL_24h 0.000299      0.000898     TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key             pvalue     padj sign 
##   <fct>     <fct>            <dbl>    <dbl> <lgl>
## 1 nonCTCF   CTCFWAPL_6h  0.0847    0.169    FALSE
## 2 nonCTCF   CTCFWAPL_24h 0.0161    0.0644   FALSE
## 3 outwards  CTCFWAPL_6h  0.0000879 0.000527 TRUE 
## 4 outwards  CTCFWAPL_24h 0.0000621 0.000464 TRUE 
## 5 inwards   CTCFWAPL_6h  0.0330    0.0989   FALSE
## 6 inwards   CTCFWAPL_24h 0.173     0.173    FALSE
## 7 ambiguous CTCFWAPL_6h  0.00210   0.0105   TRUE 
## 8 ambiguous CTCFWAPL_24h 0.0000580 0.000464 TRUE 
## # A tibble: 6 × 5
##   group     key              pvalue       padj sign 
##   <chr>     <chr>             <dbl>      <dbl> <lgl>
## 1 outwards  CTCFWAPL_6h  0.00000312 0.0000125  TRUE 
## 2 outwards  CTCFWAPL_24h 0.00000119 0.00000715 TRUE 
## 3 inwards   CTCFWAPL_6h  0.00609    0.0122     TRUE 
## 4 inwards   CTCFWAPL_24h 0.0171     0.0171     TRUE 
## 5 ambiguous CTCFWAPL_6h  0.000318   0.000954   TRUE 
## 6 ambiguous CTCFWAPL_24h 0.00000168 0.00000838 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          group = "border_ctcf_strand",
                                          ylim = c(-1.05, 0.75))

## # A tibble: 8 × 5
##   group     key         pvalue     padj sign 
##   <fct>     <fct>        <dbl>    <dbl> <lgl>
## 1 nonCTCF   RAD21_6h  1.29e-12 1.03e-11 TRUE 
## 2 nonCTCF   RAD21_24h 1.19e- 2 7.15e- 2 FALSE
## 3 outwards  RAD21_6h  2.02e- 1 4.05e- 1 FALSE
## 4 outwards  RAD21_24h 9.04e- 2 2.71e- 1 FALSE
## 5 inwards   RAD21_6h  4.12e- 3 2.88e- 2 TRUE 
## 6 inwards   RAD21_24h 4.99e- 2 2.16e- 1 FALSE
## 7 ambiguous RAD21_6h  3.82e- 1 4.05e- 1 FALSE
## 8 ambiguous RAD21_24h 4.31e- 2 2.16e- 1 FALSE
## # A tibble: 6 × 5
##   group     key        pvalue   padj sign 
##   <chr>     <chr>       <dbl>  <dbl> <lgl>
## 1 outwards  RAD21_6h  0.0692  0.208  FALSE
## 2 outwards  RAD21_24h 0.0133  0.0531 FALSE
## 3 inwards   RAD21_6h  0.279   0.557  FALSE
## 4 inwards   RAD21_24h 0.490   0.557  FALSE
## 5 ambiguous RAD21_6h  0.00855 0.0427 TRUE 
## 6 ambiguous RAD21_24h 0.00323 0.0194 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40) 
## # A tibble: 27 × 5
##    group     key            pvalue     padj sign 
##    <chr>     <chr>           <dbl>    <dbl> <lgl>
##  1 outwards  PT_24h       3.92e- 1 1   e+ 0 FALSE
##  2 inwards   PT_24h       3.02e- 1 1   e+ 0 FALSE
##  3 ambiguous PT_24h       5.07e- 2 3.55e- 1 FALSE
##  4 outwards  CTCFEL_6h    1.91e- 5 3.05e- 4 TRUE 
##  5 outwards  CTCFEL_24h   6.50e- 9 1.48e- 7 TRUE 
##  6 inwards   CTCFEL_6h    6.45e- 9 1.48e- 7 TRUE 
##  7 inwards   CTCFEL_24h   5.38e- 9 1.34e- 7 TRUE 
##  8 ambiguous CTCFEL_6h    2.00e- 9 5.19e- 8 TRUE 
##  9 ambiguous CTCFEL_24h   2.27e-14 6.14e-13 TRUE 
## 10 outwards  WAPL_6h      3.14e- 2 2.51e- 1 FALSE
## 11 outwards  WAPL_24h     8.49e- 2 4.24e- 1 FALSE
## 12 inwards   WAPL_6h      5.48e- 9 1.34e- 7 TRUE 
## 13 inwards   WAPL_24h     1.38e- 6 2.76e- 5 TRUE 
## 14 ambiguous WAPL_6h      2.95e- 6 5.31e- 5 TRUE 
## 15 ambiguous WAPL_24h     2.99e- 4 4.49e- 3 TRUE 
## 16 outwards  CTCFWAPL_6h  3.12e- 6 5.31e- 5 TRUE 
## 17 outwards  CTCFWAPL_24h 1.19e- 6 2.50e- 5 TRUE 
## 18 inwards   CTCFWAPL_6h  6.09e- 3 7.30e- 2 FALSE
## 19 inwards   CTCFWAPL_24h 1.71e- 2 1.54e- 1 FALSE
## 20 ambiguous CTCFWAPL_6h  3.18e- 4 4.49e- 3 TRUE 
## 21 ambiguous CTCFWAPL_24h 1.68e- 6 3.18e- 5 TRUE 
## 22 outwards  RAD21_6h     6.92e- 2 4.15e- 1 FALSE
## 23 outwards  RAD21_24h    1.33e- 2 1.33e- 1 FALSE
## 24 inwards   RAD21_6h     2.79e- 1 1   e+ 0 FALSE
## 25 inwards   RAD21_24h    4.90e- 1 1   e+ 0 FALSE
## 26 ambiguous RAD21_6h     8.55e- 3 9.40e- 2 FALSE
## 27 ambiguous RAD21_24h    3.23e- 3 4.20e- 2 TRUE
# Plot counts
PlotBordersWithConfidenceIntervals(tib_damid_lmnb1, 
                                   c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                     "CTCFEL_24h", "CTCFEL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  2.19e-14 6.57e-14 TRUE 
## 2 nonCTCF CTCFEL_24h 3.81e-18 1.53e-17 TRUE 
## 3 CTCF    CTCFEL_6h  7.65e- 5 1.53e- 4 TRUE 
## 4 CTCF    CTCFEL_24h 4.38e- 1 4.38e- 1 FALSE
## # A tibble: 2 × 5
##   group key          pvalue     padj sign 
##   <chr> <chr>         <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFEL_6h  1.46e-14 2.92e-14 TRUE 
## 2 CTCF  CTCFEL_24h 2.69e-10 2.69e-10 TRUE
## # A tibble: 2 × 3
##   group key          pvalue
##   <chr> <chr>         <dbl>
## 1 CTCF  CTCFEL_6h  1.46e-14
## 2 CTCF  CTCFEL_24h 2.69e-10
PlotBordersWithConfidenceIntervals(tib_damid_dam, 
                                   c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                     "CTCFEL_24h", "CTCFEL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key             pvalue       padj sign 
##   <fct>   <fct>            <dbl>      <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  0.676       0.676      FALSE
## 2 nonCTCF CTCFEL_24h 0.000299    0.000599   TRUE 
## 3 CTCF    CTCFEL_6h  0.0000380   0.000114   TRUE 
## 4 CTCF    CTCFEL_24h 0.000000520 0.00000208 TRUE 
## # A tibble: 2 × 5
##   group key          pvalue          padj sign 
##   <chr> <chr>         <dbl>         <dbl> <lgl>
## 1 CTCF  CTCFEL_6h  4.16e- 4 0.000416      TRUE 
## 2 CTCF  CTCFEL_24h 7.74e-10 0.00000000155 TRUE
## # A tibble: 2 × 3
##   group key          pvalue
##   <chr> <chr>         <dbl>
## 1 CTCF  CTCFEL_6h  4.16e- 4
## 2 CTCF  CTCFEL_24h 7.74e-10
PlotBordersWithConfidenceIntervals(tib_damid_lmnb1, 
                                   c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                     "WAPL_24h", "WAPL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key        pvalue     padj sign 
##   <fct>   <fct>       <dbl>    <dbl> <lgl>
## 1 nonCTCF WAPL_6h  2.62e- 4 2.62e- 4 TRUE 
## 2 nonCTCF WAPL_24h 1.97e-15 3.95e-15 TRUE 
## 3 CTCF    WAPL_6h  2.95e-33 8.85e-33 TRUE 
## 4 CTCF    WAPL_24h 4.51e-53 1.80e-52 TRUE 
## # A tibble: 2 × 5
##   group key        pvalue     padj sign 
##   <chr> <chr>       <dbl>    <dbl> <lgl>
## 1 CTCF  WAPL_6h  2.01e-10 4.02e-10 TRUE 
## 2 CTCF  WAPL_24h 6.03e- 9 6.03e- 9 TRUE
## # A tibble: 2 × 3
##   group key        pvalue
##   <chr> <chr>       <dbl>
## 1 CTCF  WAPL_6h  2.01e-10
## 2 CTCF  WAPL_24h 6.03e- 9
PlotBordersWithConfidenceIntervals(tib_damid_dam, 
                                   c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                     "WAPL_24h", "WAPL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key       pvalue   padj sign 
##   <fct>   <fct>      <dbl>  <dbl> <lgl>
## 1 nonCTCF WAPL_6h  0.730   1      FALSE
## 2 nonCTCF WAPL_24h 0.816   1      FALSE
## 3 CTCF    WAPL_6h  0.00330 0.0132 TRUE 
## 4 CTCF    WAPL_24h 0.0150  0.0450 TRUE 
## # A tibble: 2 × 5
##   group key      pvalue   padj sign 
##   <chr> <chr>     <dbl>  <dbl> <lgl>
## 1 CTCF  WAPL_6h  0.0124 0.0247 TRUE 
## 2 CTCF  WAPL_24h 0.0397 0.0397 TRUE
## # A tibble: 2 × 3
##   group key      pvalue
##   <chr> <chr>     <dbl>
## 1 CTCF  WAPL_6h  0.0124
## 2 CTCF  WAPL_24h 0.0397
PlotBordersWithConfidenceIntervals(tib_damid_lmnb1, 
                                   c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                     "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key            pvalue     padj sign 
##   <fct>   <fct>           <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  1.47e- 1 1.47e- 1 FALSE
## 2 nonCTCF CTCFWAPL_24h 5.19e- 3 1.04e- 2 TRUE 
## 3 CTCF    CTCFWAPL_6h  7.93e-20 2.38e-19 TRUE 
## 4 CTCF    CTCFWAPL_24h 2.25e-26 9.02e-26 TRUE 
## # A tibble: 2 × 5
##   group key            pvalue     padj sign 
##   <chr> <chr>           <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFWAPL_6h  1.03e-10 1.03e-10 TRUE 
## 2 CTCF  CTCFWAPL_24h 9.31e-12 1.86e-11 TRUE
## # A tibble: 2 × 3
##   group key            pvalue
##   <chr> <chr>           <dbl>
## 1 CTCF  CTCFWAPL_6h  1.03e-10
## 2 CTCF  CTCFWAPL_24h 9.31e-12
PlotBordersWithConfidenceIntervals(tib_damid_dam, 
                                   c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                     "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key            pvalue    padj sign 
##   <fct>   <fct>           <dbl>   <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.0526   0.158   FALSE
## 2 nonCTCF CTCFWAPL_24h 0.159    0.318   FALSE
## 3 CTCF    CTCFWAPL_6h  0.000294 0.00118 TRUE 
## 4 CTCF    CTCFWAPL_24h 0.631    0.631   FALSE
## # A tibble: 2 × 5
##   group key          pvalue  padj sign 
##   <chr> <chr>         <dbl> <dbl> <lgl>
## 1 CTCF  CTCFWAPL_6h  0.0543 0.109 FALSE
## 2 CTCF  CTCFWAPL_24h 0.206  0.206 FALSE
## # A tibble: 2 × 3
##   group key          pvalue
##   <chr> <chr>         <dbl>
## 1 CTCF  CTCFWAPL_6h  0.0543
## 2 CTCF  CTCFWAPL_24h 0.206
PlotBordersWithConfidenceIntervals(tib_damid_lmnb1, 
                                   c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                     "RAD21_24h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key         pvalue     padj sign 
##   <fct>   <fct>        <dbl>    <dbl> <lgl>
## 1 nonCTCF RAD21_6h  3.79e-18 1.52e-17 TRUE 
## 2 nonCTCF RAD21_24h 5.69e- 4 1.14e- 3 TRUE 
## 3 CTCF    RAD21_6h  3.10e- 6 9.31e- 6 TRUE 
## 4 CTCF    RAD21_24h 9.48e- 2 9.48e- 2 FALSE
## # A tibble: 2 × 5
##   group key         pvalue    padj sign 
##   <chr> <chr>        <dbl>   <dbl> <lgl>
## 1 CTCF  RAD21_6h  0.0325   0.0325  TRUE 
## 2 CTCF  RAD21_24h 0.000613 0.00123 TRUE
## # A tibble: 2 × 3
##   group key         pvalue
##   <chr> <chr>        <dbl>
## 1 CTCF  RAD21_6h  0.0325  
## 2 CTCF  RAD21_24h 0.000613
PlotBordersWithConfidenceIntervals(tib_damid_dam, 
                                   c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                     "RAD21_24h"),
                                   ylim = c(0, 7))

## # A tibble: 4 × 5
##   group   key        pvalue    padj sign 
##   <fct>   <fct>       <dbl>   <dbl> <lgl>
## 1 nonCTCF RAD21_6h  0.00755 0.0226  TRUE 
## 2 nonCTCF RAD21_24h 0.00149 0.00595 TRUE 
## 3 CTCF    RAD21_6h  0.0810  0.0810  FALSE
## 4 CTCF    RAD21_24h 0.0124  0.0248  TRUE 
## # A tibble: 2 × 5
##   group key       pvalue  padj sign 
##   <chr> <chr>      <dbl> <dbl> <lgl>
## 1 CTCF  RAD21_6h   0.863     1 FALSE
## 2 CTCF  RAD21_24h  0.736     1 FALSE
## # A tibble: 2 × 3
##   group key       pvalue
##   <chr> <chr>      <dbl>
## 1 CTCF  RAD21_6h   0.863
## 2 CTCF  RAD21_24h  0.736
# CTCF vs non-CTCF
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                     "CTCFEL_24h", "CTCFEL_96h"))

## # A tibble: 6 × 5
##   group   key          pvalue     padj sign 
##   <fct>   <fct>         <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  1.73e- 1 1.73e- 1 FALSE
## 2 nonCTCF CTCFEL_24h 2.50e- 7 7.49e- 7 TRUE 
## 3 nonCTCF CTCFEL_96h 2.19e- 5 4.38e- 5 TRUE 
## 4 CTCF    CTCFEL_6h  5.14e-18 3.08e-17 TRUE 
## 5 CTCF    CTCFEL_24h 4.90e-15 2.45e-14 TRUE 
## 6 CTCF    CTCFEL_96h 1.67e-10 6.69e-10 TRUE 
## # A tibble: 3 × 5
##   group key          pvalue     padj sign 
##   <chr> <chr>         <dbl>    <dbl> <lgl>
## 1 CTCF  CTCFEL_6h  1.55e-15 3.09e-15 TRUE 
## 2 CTCF  CTCFEL_24h 4.58e-21 1.37e-20 TRUE 
## 3 CTCF  CTCFEL_96h 1.70e-14 1.70e-14 TRUE
## # A tibble: 3 × 3
##   group key          pvalue
##   <chr> <chr>         <dbl>
## 1 CTCF  CTCFEL_6h  1.55e-15
## 2 CTCF  CTCFEL_24h 4.58e-21
## 3 CTCF  CTCFEL_96h 1.70e-14
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                     "WAPL_24h", "WAPL_96h"))

## # A tibble: 6 × 5
##   group   key        pvalue     padj sign 
##   <fct>   <fct>       <dbl>    <dbl> <lgl>
## 1 nonCTCF WAPL_6h  1.45e- 5 1.45e- 5 TRUE 
## 2 nonCTCF WAPL_24h 1.23e- 7 3.70e- 7 TRUE 
## 3 nonCTCF WAPL_96h 1.46e- 6 2.92e- 6 TRUE 
## 4 CTCF    WAPL_6h  3.42e-32 2.05e-31 TRUE 
## 5 CTCF    WAPL_24h 5.02e-30 2.51e-29 TRUE 
## 6 CTCF    WAPL_96h 1.93e-12 7.72e-12 TRUE 
## # A tibble: 3 × 5
##   group key        pvalue     padj sign 
##   <chr> <chr>       <dbl>    <dbl> <lgl>
## 1 CTCF  WAPL_6h  8.50e-11 2.55e-10 TRUE 
## 2 CTCF  WAPL_24h 1.79e- 7 3.58e- 7 TRUE 
## 3 CTCF  WAPL_96h 9.44e- 2 9.44e- 2 FALSE
## # A tibble: 3 × 3
##   group key        pvalue
##   <chr> <chr>       <dbl>
## 1 CTCF  WAPL_6h  8.50e-11
## 2 CTCF  WAPL_24h 1.79e- 7
## 3 CTCF  WAPL_96h 9.44e- 2
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                     "CTCFWAPL_24h", "CTCFWAPL_96h"))

## # A tibble: 6 × 5
##   group   key               pvalue        padj sign 
##   <fct>   <fct>              <dbl>       <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.0847      0.169       FALSE
## 2 nonCTCF CTCFWAPL_24h 0.0161      0.0483      TRUE 
## 3 nonCTCF CTCFWAPL_96h 0.0881      0.169       FALSE
## 4 CTCF    CTCFWAPL_6h  0.000000228 0.00000114  TRUE 
## 5 CTCF    CTCFWAPL_24h 0.000000128 0.000000771 TRUE 
## 6 CTCF    CTCFWAPL_96h 0.000321    0.00128     TRUE 
## # A tibble: 3 × 5
##   group key                pvalue         padj sign 
##   <chr> <chr>               <dbl>        <dbl> <lgl>
## 1 CTCF  CTCFWAPL_6h  0.000000229  0.000000457  TRUE 
## 2 CTCF  CTCFWAPL_24h 0.0000000150 0.0000000450 TRUE 
## 3 CTCF  CTCFWAPL_96h 0.000109     0.000109     TRUE
## # A tibble: 3 × 3
##   group key                pvalue
##   <chr> <chr>               <dbl>
## 1 CTCF  CTCFWAPL_6h  0.000000229 
## 2 CTCF  CTCFWAPL_24h 0.0000000150
## 3 CTCF  CTCFWAPL_96h 0.000109
# With CTCF orientation
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                     "CTCFEL_24h", "CTCFEL_96h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.95, 0.80))

## # A tibble: 12 × 5
##    group     key              pvalue        padj sign 
##    <fct>     <fct>             <dbl>       <dbl> <lgl>
##  1 nonCTCF   CTCFEL_6h  0.173        0.173       FALSE
##  2 nonCTCF   CTCFEL_24h 0.000000250  0.00000200  TRUE 
##  3 nonCTCF   CTCFEL_96h 0.0000219    0.000153    TRUE 
##  4 outwards  CTCFEL_6h  0.000151     0.000691    TRUE 
##  5 outwards  CTCFEL_24h 0.0000309    0.000185    TRUE 
##  6 outwards  CTCFEL_96h 0.00759      0.0177      TRUE 
##  7 inwards   CTCFEL_6h  0.0000000598 0.000000538 TRUE 
##  8 inwards   CTCFEL_24h 0.000138     0.000691    TRUE 
##  9 inwards   CTCFEL_96h 0.00591      0.0177      TRUE 
## 10 ambiguous CTCFEL_6h  0.0000000330 0.000000363 TRUE 
## 11 ambiguous CTCFEL_24h 0.0000000200 0.000000241 TRUE 
## 12 ambiguous CTCFEL_96h 0.0000000533 0.000000533 TRUE 
## # A tibble: 9 × 5
##   group     key          pvalue     padj sign 
##   <chr>     <chr>         <dbl>    <dbl> <lgl>
## 1 outwards  CTCFEL_6h  1.91e- 5 3.81e- 5 TRUE 
## 2 outwards  CTCFEL_24h 6.50e- 9 3.23e- 8 TRUE 
## 3 outwards  CTCFEL_96h 4.94e- 5 4.94e- 5 TRUE 
## 4 inwards   CTCFEL_6h  6.45e- 9 3.23e- 8 TRUE 
## 5 inwards   CTCFEL_24h 5.38e- 9 3.23e- 8 TRUE 
## 6 inwards   CTCFEL_96h 1.02e- 5 3.05e- 5 TRUE 
## 7 ambiguous CTCFEL_6h  2.00e- 9 1.40e- 8 TRUE 
## 8 ambiguous CTCFEL_24h 2.27e-14 2.05e-13 TRUE 
## 9 ambiguous CTCFEL_96h 1.42e-12 1.13e-11 TRUE
## # A tibble: 9 × 3
##   group     key          pvalue
##   <chr>     <chr>         <dbl>
## 1 outwards  CTCFEL_6h  1.91e- 5
## 2 outwards  CTCFEL_24h 6.50e- 9
## 3 outwards  CTCFEL_96h 4.94e- 5
## 4 inwards   CTCFEL_6h  6.45e- 9
## 5 inwards   CTCFEL_24h 5.38e- 9
## 6 inwards   CTCFEL_96h 1.02e- 5
## 7 ambiguous CTCFEL_6h  2.00e- 9
## 8 ambiguous CTCFEL_24h 2.27e-14
## 9 ambiguous CTCFEL_96h 1.42e-12
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                     "WAPL_24h", "WAPL_96h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.95, 0.80)) 

## # A tibble: 12 × 5
##    group     key        pvalue     padj sign 
##    <fct>     <fct>       <dbl>    <dbl> <lgl>
##  1 nonCTCF   WAPL_6h  1.45e- 5 7.27e- 5 TRUE 
##  2 nonCTCF   WAPL_24h 1.23e- 7 8.63e- 7 TRUE 
##  3 nonCTCF   WAPL_96h 1.46e- 6 8.75e- 6 TRUE 
##  4 outwards  WAPL_6h  6.51e- 5 2.61e- 4 TRUE 
##  5 outwards  WAPL_24h 2.40e- 4 7.19e- 4 TRUE 
##  6 outwards  WAPL_96h 1.53e- 1 1.53e- 1 FALSE
##  7 inwards   WAPL_6h  4.57e-17 5.48e-16 TRUE 
##  8 inwards   WAPL_24h 5.41e-17 5.95e-16 TRUE 
##  9 inwards   WAPL_96h 6.61e-11 5.29e-10 TRUE 
## 10 ambiguous WAPL_6h  1.67e-13 1.67e-12 TRUE 
## 11 ambiguous WAPL_24h 1.83e-12 1.65e-11 TRUE 
## 12 ambiguous WAPL_96h 6.36e- 4 1.27e- 3 TRUE 
## # A tibble: 9 × 5
##   group     key             pvalue         padj sign 
##   <chr>     <chr>            <dbl>        <dbl> <lgl>
## 1 outwards  WAPL_6h  0.0314        0.126        FALSE
## 2 outwards  WAPL_24h 0.0849        0.255        FALSE
## 3 outwards  WAPL_96h 0.558         1            FALSE
## 4 inwards   WAPL_6h  0.00000000548 0.0000000493 TRUE 
## 5 inwards   WAPL_24h 0.00000138    0.0000111    TRUE 
## 6 inwards   WAPL_96h 0.00266       0.0133       TRUE 
## 7 ambiguous WAPL_6h  0.00000295    0.0000207    TRUE 
## 8 ambiguous WAPL_24h 0.000299      0.00180      TRUE 
## 9 ambiguous WAPL_96h 0.670         1            FALSE
## # A tibble: 9 × 3
##   group     key             pvalue
##   <chr>     <chr>            <dbl>
## 1 outwards  WAPL_6h  0.0314       
## 2 outwards  WAPL_24h 0.0849       
## 3 outwards  WAPL_96h 0.558        
## 4 inwards   WAPL_6h  0.00000000548
## 5 inwards   WAPL_24h 0.00000138   
## 6 inwards   WAPL_96h 0.00266      
## 7 ambiguous WAPL_6h  0.00000295   
## 8 ambiguous WAPL_24h 0.000299     
## 9 ambiguous WAPL_96h 0.670
PlotBordersWithConfidenceIntervals(tib_damid, filter_96h = F, 
                                   c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                     "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                   group = "border_ctcf_strand",
                                   ylim = c(-0.95, 0.80))

## # A tibble: 12 × 5
##    group     key             pvalue     padj sign 
##    <fct>     <fct>            <dbl>    <dbl> <lgl>
##  1 nonCTCF   CTCFWAPL_6h  0.0847    0.339    FALSE
##  2 nonCTCF   CTCFWAPL_24h 0.0161    0.0967   FALSE
##  3 nonCTCF   CTCFWAPL_96h 0.0881    0.339    FALSE
##  4 outwards  CTCFWAPL_6h  0.0000879 0.000879 TRUE 
##  5 outwards  CTCFWAPL_24h 0.0000621 0.000696 TRUE 
##  6 outwards  CTCFWAPL_96h 0.00311   0.0218   TRUE 
##  7 inwards   CTCFWAPL_6h  0.0330    0.165    FALSE
##  8 inwards   CTCFWAPL_24h 0.173     0.347    FALSE
##  9 inwards   CTCFWAPL_96h 0.992     0.992    FALSE
## 10 ambiguous CTCFWAPL_6h  0.00210   0.0168   TRUE 
## 11 ambiguous CTCFWAPL_24h 0.0000580 0.000696 TRUE 
## 12 ambiguous CTCFWAPL_96h 0.000316  0.00285  TRUE 
## # A tibble: 9 × 5
##   group     key              pvalue      padj sign 
##   <chr>     <chr>             <dbl>     <dbl> <lgl>
## 1 outwards  CTCFWAPL_6h  0.00000312 0.0000218 TRUE 
## 2 outwards  CTCFWAPL_24h 0.00000119 0.0000107 TRUE 
## 3 outwards  CTCFWAPL_96h 0.000445   0.00178   TRUE 
## 4 inwards   CTCFWAPL_6h  0.00609    0.0183    TRUE 
## 5 inwards   CTCFWAPL_24h 0.0171     0.0341    TRUE 
## 6 inwards   CTCFWAPL_96h 0.412      0.412     FALSE
## 7 ambiguous CTCFWAPL_6h  0.000318   0.00159   TRUE 
## 8 ambiguous CTCFWAPL_24h 0.00000168 0.0000134 TRUE 
## 9 ambiguous CTCFWAPL_96h 0.0000636  0.000382  TRUE
## # A tibble: 9 × 3
##   group     key              pvalue
##   <chr>     <chr>             <dbl>
## 1 outwards  CTCFWAPL_6h  0.00000312
## 2 outwards  CTCFWAPL_24h 0.00000119
## 3 outwards  CTCFWAPL_96h 0.000445  
## 4 inwards   CTCFWAPL_6h  0.00609   
## 5 inwards   CTCFWAPL_24h 0.0171    
## 6 inwards   CTCFWAPL_96h 0.412     
## 7 ambiguous CTCFWAPL_6h  0.000318  
## 8 ambiguous CTCFWAPL_24h 0.00000168
## 9 ambiguous CTCFWAPL_96h 0.0000636

As mentioned, also plot borders with multiple CTCF sites.

# CTCF + number vs non-CTCF
tib_stat <- tibble()

PlotBordersWithConfidenceIntervals(tib_damid, 
                                   c("PT_0h"),
                                   group = "border_ctcf_n",
                                   ylim = c(-1.25, 0.90),
                                   keep_PT = T)

## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,  
                                          c("PT_0h", "PT_24h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90),
                                          keep_PT = T)

## # A tibble: 4 × 5
##   group   key    pvalue  padj sign 
##   <fct>   <fct>   <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h  0.884 0.950 FALSE
## 2 1       PT_24h  0.349 0.950 FALSE
## 3 2       PT_24h  0.117 0.470 FALSE
## 4 >=3     PT_24h  0.317 0.950 FALSE
## # A tibble: 3 × 5
##   group key    pvalue  padj sign 
##   <chr> <chr>   <dbl> <dbl> <lgl>
## 1 1     PT_24h  0.530 0.530 FALSE
## 2 2     PT_24h  0.158 0.473 FALSE
## 3 >=3   PT_24h  0.262 0.523 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFEL_0h", "CTCFEL_6h", 
                                            "CTCFEL_24h", "CTCFEL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key               pvalue         padj sign 
##   <fct>   <fct>              <dbl>        <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h  0.173         0.173        FALSE
## 2 nonCTCF CTCFEL_24h 0.000000250   0.00000125   TRUE 
## 3 1       CTCFEL_6h  0.0000000310  0.000000186  TRUE 
## 4 1       CTCFEL_24h 0.00000114    0.00000456   TRUE 
## 5 2       CTCFEL_6h  0.00000000948 0.0000000758 TRUE 
## 6 2       CTCFEL_24h 0.0000000182  0.000000128  TRUE 
## 7 >=3     CTCFEL_6h  0.00000298    0.00000894   TRUE 
## 8 >=3     CTCFEL_24h 0.0000247     0.0000494    TRUE 
## # A tibble: 6 × 5
##   group key          pvalue     padj sign 
##   <chr> <chr>         <dbl>    <dbl> <lgl>
## 1 1     CTCFEL_6h  3.77e- 8 1.13e- 7 TRUE 
## 2 1     CTCFEL_24h 2.74e-12 1.37e-11 TRUE 
## 3 2     CTCFEL_6h  1.69e-11 6.76e-11 TRUE 
## 4 2     CTCFEL_24h 4.05e-14 2.43e-13 TRUE 
## 5 >=3   CTCFEL_6h  1.22e- 7 2.44e- 7 TRUE 
## 6 >=3   CTCFEL_24h 6.77e- 7 6.77e- 7 TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "WAPL_0h", "WAPL_6h", 
                                            "WAPL_24h", "WAPL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key        pvalue     padj sign 
##   <fct>   <fct>       <dbl>    <dbl> <lgl>
## 1 nonCTCF WAPL_6h  1.45e- 5 4.36e- 5 TRUE 
## 2 nonCTCF WAPL_24h 1.23e- 7 4.93e- 7 TRUE 
## 3 1       WAPL_6h  1.80e-19 1.44e-18 TRUE 
## 4 1       WAPL_24h 4.26e-19 2.98e-18 TRUE 
## 5 2       WAPL_6h  1.27e-12 7.62e-12 TRUE 
## 6 2       WAPL_24h 8.38e-11 4.19e-10 TRUE 
## 7 >=3     WAPL_6h  2.00e- 3 2.00e- 3 TRUE 
## 8 >=3     WAPL_24h 9.63e- 4 1.93e- 3 TRUE 
## # A tibble: 6 × 5
##   group key            pvalue         padj sign 
##   <chr> <chr>           <dbl>        <dbl> <lgl>
## 1 1     WAPL_6h  0.00000124   0.00000620   TRUE 
## 2 1     WAPL_24h 0.0000190    0.0000760    TRUE 
## 3 2     WAPL_6h  0.0000000105 0.0000000630 TRUE 
## 4 2     WAPL_24h 0.0000486    0.000146     TRUE 
## 5 >=3   WAPL_6h  0.0280       0.0559       FALSE
## 6 >=3   WAPL_24h 0.197        0.197        FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h", 
                                            "CTCFWAPL_24h", "CTCFWAPL_96h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key             pvalue     padj sign 
##   <fct>   <fct>            <dbl>    <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h  0.0847    0.0981   FALSE
## 2 nonCTCF CTCFWAPL_24h 0.0161    0.0483   TRUE 
## 3 1       CTCFWAPL_6h  0.0000961 0.000769 TRUE 
## 4 1       CTCFWAPL_24h 0.000260  0.00182  TRUE 
## 5 2       CTCFWAPL_6h  0.00270   0.0135   TRUE 
## 6 2       CTCFWAPL_24h 0.000811  0.00487  TRUE 
## 7 >=3     CTCFWAPL_6h  0.0491    0.0981   FALSE
## 8 >=3     CTCFWAPL_24h 0.00434   0.0173   TRUE 
## # A tibble: 6 × 5
##   group key             pvalue      padj sign 
##   <chr> <chr>            <dbl>     <dbl> <lgl>
## 1 1     CTCFWAPL_6h  0.0000167 0.0000835 TRUE 
## 2 1     CTCFWAPL_24h 0.0000125 0.0000753 TRUE 
## 3 2     CTCFWAPL_6h  0.000506  0.00152   TRUE 
## 4 2     CTCFWAPL_24h 0.0000337 0.000135  TRUE 
## 5 >=3   CTCFWAPL_6h  0.0295    0.0295    TRUE 
## 6 >=3   CTCFWAPL_24h 0.00178   0.00356   TRUE
tib_stat <- bind_rows(tib_stat, tmp)

tmp <- PlotBordersWithConfidenceIntervals(tib_damid, 
                                          c("PT_0h", "RAD21_0h", "RAD21_6h", 
                                            "RAD21_24h"),
                                          group = "border_ctcf_n",
                                          ylim = c(-1.25, 0.90))

## # A tibble: 8 × 5
##   group   key         pvalue     padj sign 
##   <fct>   <fct>        <dbl>    <dbl> <lgl>
## 1 nonCTCF RAD21_6h  1.29e-12 1.03e-11 TRUE 
## 2 nonCTCF RAD21_24h 1.19e- 2 7.15e- 2 FALSE
## 3 1       RAD21_6h  6.52e- 3 4.56e- 2 TRUE 
## 4 1       RAD21_24h 8.56e- 1 1   e+ 0 FALSE
## 5 2       RAD21_6h  3.14e- 1 1   e+ 0 FALSE
## 6 2       RAD21_24h 5.11e- 1 1   e+ 0 FALSE
## 7 >=3     RAD21_6h  4.06e- 1 1   e+ 0 FALSE
## 8 >=3     RAD21_24h 6.43e- 1 1   e+ 0 FALSE
## # A tibble: 6 × 5
##   group key       pvalue  padj sign 
##   <chr> <chr>      <dbl> <dbl> <lgl>
## 1 1     RAD21_6h  0.0286 0.172 FALSE
## 2 1     RAD21_24h 0.121  0.375 FALSE
## 3 2     RAD21_6h  0.0534 0.267 FALSE
## 4 2     RAD21_24h 0.0937 0.375 FALSE
## 5 >=3   RAD21_6h  0.366  0.571 FALSE
## 6 >=3   RAD21_24h 0.285  0.571 FALSE
tib_stat <- bind_rows(tib_stat, tmp)

tib_stat %>% 
  mutate(padj = p.adjust(pvalue),
         sign = padj < 0.05) %>%
  print(n = 40)
## # A tibble: 27 × 5
##    group key            pvalue     padj sign 
##    <chr> <chr>           <dbl>    <dbl> <lgl>
##  1 1     PT_24h       5.30e- 1 1   e+ 0 FALSE
##  2 2     PT_24h       1.58e- 1 9.45e- 1 FALSE
##  3 >=3   PT_24h       2.62e- 1 1   e+ 0 FALSE
##  4 1     CTCFEL_6h    3.77e- 8 8.67e- 7 TRUE 
##  5 1     CTCFEL_24h   2.74e-12 7.13e-11 TRUE 
##  6 2     CTCFEL_6h    1.69e-11 4.22e-10 TRUE 
##  7 2     CTCFEL_24h   4.05e-14 1.09e-12 TRUE 
##  8 >=3   CTCFEL_6h    1.22e- 7 2.69e- 6 TRUE 
##  9 >=3   CTCFEL_24h   6.77e- 7 1.42e- 5 TRUE 
## 10 1     WAPL_6h      1.24e- 6 2.48e- 5 TRUE 
## 11 1     WAPL_24h     1.90e- 5 3.23e- 4 TRUE 
## 12 2     WAPL_6h      1.05e- 8 2.52e- 7 TRUE 
## 13 2     WAPL_24h     4.86e- 5 7.29e- 4 TRUE 
## 14 >=3   WAPL_6h      2.80e- 2 3.36e- 1 FALSE
## 15 >=3   WAPL_24h     1.97e- 1 9.85e- 1 FALSE
## 16 1     CTCFWAPL_6h  1.67e- 5 3.01e- 4 TRUE 
## 17 1     CTCFWAPL_24h 1.25e- 5 2.38e- 4 TRUE 
## 18 2     CTCFWAPL_6h  5.06e- 4 7.08e- 3 TRUE 
## 19 2     CTCFWAPL_24h 3.37e- 5 5.39e- 4 TRUE 
## 20 >=3   CTCFWAPL_6h  2.95e- 2 3.36e- 1 FALSE
## 21 >=3   CTCFWAPL_24h 1.78e- 3 2.31e- 2 TRUE 
## 22 1     RAD21_6h     2.86e- 2 3.36e- 1 FALSE
## 23 1     RAD21_24h    1.21e- 1 8.46e- 1 FALSE
## 24 2     RAD21_6h     5.34e- 2 4.80e- 1 FALSE
## 25 2     RAD21_24h    9.37e- 2 7.50e- 1 FALSE
## 26 >=3   RAD21_6h     3.66e- 1 1   e+ 0 FALSE
## 27 >=3   RAD21_24h    2.85e- 1 1   e+ 0 FALSE

Good.

4. Save data

saveRDS(metadata.damid, 
        file.path(output.dir, "metadata_damid.rds"))
saveRDS(damid, 
        file.path(output.dir, "damid.rds"))
saveRDS(bin.size, 
        file.path(output.dir, "bin_size.rds"))

# Also, as tsv file
tib_damid <- as_tibble(damid) %>%
  dplyr::select(-width, -strand) %>%
  dplyr::select(-contains("CTCF_"), -contains("CTCFNQ_"))
print(names(tib_damid))
##  [1] "seqnames"     "start"        "end"          "PT_0h"        "PT_24h"      
##  [6] "CTCFEL_0h"    "CTCFEL_6h"    "CTCFEL_24h"   "CTCFEL_96h"   "RAD21_0h"    
## [11] "RAD21_6h"     "RAD21_24h"    "WAPL_0h"      "WAPL_6h"      "WAPL_24h"    
## [16] "WAPL_96h"     "CTCFWAPL_0h"  "CTCFWAPL_6h"  "CTCFWAPL_24h" "CTCFWAPL_96h"
tib_damid <- tib_damid  %>%
  rename_all(function(x) str_replace(x, "CTCFEL", "CTCF")) 
print(names(tib_damid))
##  [1] "seqnames"     "start"        "end"          "PT_0h"        "PT_24h"      
##  [6] "CTCF_0h"      "CTCF_6h"      "CTCF_24h"     "CTCF_96h"     "RAD21_0h"    
## [11] "RAD21_6h"     "RAD21_24h"    "WAPL_0h"      "WAPL_6h"      "WAPL_24h"    
## [16] "WAPL_96h"     "CTCFWAPL_0h"  "CTCFWAPL_6h"  "CTCFWAPL_24h" "CTCFWAPL_96h"
write_tsv(tib_damid,
          file.path(output.dir, "damid_average_replicates.tsv"))

# Also, for replicates individually
damid_replicates <- as_tibble(damid_replicates) %>%
  dplyr::select(-width, -strand, 
                -contains("NQ"))
print(names(damid_replicates))
##  [1] "seqnames"        "start"           "end"             "PT_0h_r8"       
##  [5] "PT_0h_r9"        "PT_24h_r8"       "PT_24h_r9"       "CTCFEL_0h_r1"   
##  [9] "CTCFEL_0h_r4"    "CTCFEL_6h_r1"    "CTCFEL_6h_r4"    "CTCFEL_24h_r1"  
## [13] "CTCFEL_24h_r4"   "CTCFEL_96h_r1"   "CTCFEL_96h_r4"   "WAPL_0h_r1"     
## [17] "WAPL_0h_r2"      "WAPL_0h_r5"      "WAPL_6h_r1"      "WAPL_6h_r2"     
## [21] "WAPL_6h_r5"      "WAPL_24h_r1"     "WAPL_24h_r2"     "WAPL_24h_r5"    
## [25] "WAPL_96h_r1"     "WAPL_96h_r2"     "WAPL_96h_r5"     "CTCFWAPL_0h_r1" 
## [29] "CTCFWAPL_0h_r2"  "CTCFWAPL_0h_r6"  "CTCFWAPL_0h_r7"  "CTCFWAPL_6h_r1" 
## [33] "CTCFWAPL_6h_r2"  "CTCFWAPL_6h_r6"  "CTCFWAPL_6h_r7"  "CTCFWAPL_24h_r1"
## [37] "CTCFWAPL_24h_r6" "CTCFWAPL_24h_r7" "CTCFWAPL_96h_r1" "CTCFWAPL_96h_r2"
## [41] "CTCFWAPL_96h_r6" "RAD21_0h_r4"     "RAD21_0h_r5"     "RAD21_6h_r4"    
## [45] "RAD21_6h_r5"     "RAD21_24h_r4"    "RAD21_24h_r5"
damid_replicates <- damid_replicates %>%
  rename_all(~ c("seqnames", "start", "end",
                 "PT_0h_r1", "PT_0h_r2",
                 "PT_24h_r1", "PT_24h_r2",
                 "CTCF_0h_r1", "CTCF_0h_r2",
                 "CTCF_6h_r1", "CTCF_6h_r2",
                 "CTCF_24h_r1", "CTCF_24h_r2",
                 "CTCF_96h_r1", "CTCF_96h_r2",
                 "WAPL_0h_r1", "WAPL_0h_r2", "WAPL_0h_r3",
                 "WAPL_6h_r1", "WAPL_6h_r2", "WAPL_6h_r3",
                 "WAPL_24h_r1", "WAPL_24h_r2", "WAPL_24h_r3",
                 "WAPL_96h_r1", "WAPL_96h_r2", "WAPL_96h_r3",
                 "CTCFWAPL_0h_r1", "CTCFWAPL_0h_r2", 
                 "CTCFWAPL_0h_r3", "CTCFWAPL_0h_r4",
                 "CTCFWAPL_6h_r1", "CTCFWAPL_6h_r2", 
                 "CTCFWAPL_6h_r3", "CTCFWAPL_6h_r4",
                 "CTCFWAPL_24h_r1", 
                 "CTCFWAPL_24h_r3", "CTCFWAPL_24h_r4",
                 "CTCFWAPL_96h_r1", "CTCFWAPL_96h_r2", 
                 "CTCFWAPL_96h_r3",
                 "RAD21_0h_r1", "RAD21_0h_r2",
                 "RAD21_6h_r1", "RAD21_6h_r2",
                 "RAD21_24h_r1", "RAD21_24h_r2"))
print(names(damid_replicates))
##  [1] "seqnames"        "start"           "end"             "PT_0h_r1"       
##  [5] "PT_0h_r2"        "PT_24h_r1"       "PT_24h_r2"       "CTCF_0h_r1"     
##  [9] "CTCF_0h_r2"      "CTCF_6h_r1"      "CTCF_6h_r2"      "CTCF_24h_r1"    
## [13] "CTCF_24h_r2"     "CTCF_96h_r1"     "CTCF_96h_r2"     "WAPL_0h_r1"     
## [17] "WAPL_0h_r2"      "WAPL_0h_r3"      "WAPL_6h_r1"      "WAPL_6h_r2"     
## [21] "WAPL_6h_r3"      "WAPL_24h_r1"     "WAPL_24h_r2"     "WAPL_24h_r3"    
## [25] "WAPL_96h_r1"     "WAPL_96h_r2"     "WAPL_96h_r3"     "CTCFWAPL_0h_r1" 
## [29] "CTCFWAPL_0h_r2"  "CTCFWAPL_0h_r3"  "CTCFWAPL_0h_r4"  "CTCFWAPL_6h_r1" 
## [33] "CTCFWAPL_6h_r2"  "CTCFWAPL_6h_r3"  "CTCFWAPL_6h_r4"  "CTCFWAPL_24h_r1"
## [37] "CTCFWAPL_24h_r3" "CTCFWAPL_24h_r4" "CTCFWAPL_96h_r1" "CTCFWAPL_96h_r2"
## [41] "CTCFWAPL_96h_r3" "RAD21_0h_r1"     "RAD21_0h_r2"     "RAD21_6h_r1"    
## [45] "RAD21_6h_r2"     "RAD21_24h_r1"    "RAD21_24h_r2"
write_tsv(damid_replicates,
          file.path(output.dir, "damid_individual_replicates.tsv"))

Conclusion

CTCF depletion and the other depletion do not have very strong effects on LAD border positioning. I can see effects outside the borders overlapping with the CTCF site.

Also, this document veries reproducibility between replicate experiments and PCA plots summarize the global trends.

SessionInfo

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.33           corrr_0.4.3          broom_0.7.9         
##  [4] ggbeeswarm_0.6.0     M3C_1.12.0           yaml_2.2.1          
##  [7] caTools_1.18.2       rtracklayer_1.50.0   GenomicRanges_1.42.0
## [10] GenomeInfoDb_1.26.7  IRanges_2.24.1       S4Vectors_0.28.1    
## [13] BiocGenerics_0.36.1  forcats_0.5.1        stringr_1.4.0       
## [16] dplyr_1.0.7          purrr_0.3.4          readr_2.0.0         
## [19] tidyr_1.1.3          tibble_3.1.6         ggplot2_3.3.5       
## [22] tidyverse_1.3.1     
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.15                  colorspace_2.0-2           
##  [3] ellipsis_0.3.2              corpcor_1.6.9              
##  [5] XVector_0.30.0              fs_1.5.0                   
##  [7] rstudioapi_0.13             farver_2.1.0               
##  [9] bit64_4.0.5                 RSpectra_0.16-0            
## [11] fansi_0.5.0                 lubridate_1.7.10           
## [13] xml2_1.3.2                  codetools_0.2-18           
## [15] doParallel_1.0.16           jsonlite_1.7.2             
## [17] Rsamtools_2.6.0             umap_0.2.7.0               
## [19] cluster_2.1.2               dbplyr_2.1.1               
## [21] png_0.1-7                   compiler_4.0.5             
## [23] httr_1.4.2                  backports_1.2.1            
## [25] assertthat_0.2.1            Matrix_1.3-4               
## [27] cli_3.1.0                   htmltools_0.5.1.1          
## [29] tools_4.0.5                 gtable_0.3.0               
## [31] glue_1.5.0                  GenomeInfoDbData_1.2.4     
## [33] Rcpp_1.0.7                  Biobase_2.50.0             
## [35] cellranger_1.1.0            jquerylib_0.1.4            
## [37] vctrs_0.3.8                 Biostrings_2.58.0          
## [39] iterators_1.0.13            xfun_0.24                  
## [41] rvest_1.0.1                 lifecycle_1.0.1            
## [43] XML_3.99-0.6                zlibbioc_1.36.0            
## [45] scales_1.1.1                vroom_1.5.3                
## [47] hms_1.1.0                   doSNOW_1.0.19              
## [49] MatrixGenerics_1.2.1        SummarizedExperiment_1.20.0
## [51] RColorBrewer_1.1-2          reticulate_1.20            
## [53] sass_0.4.0                  stringi_1.7.3              
## [55] highr_0.9                   foreach_1.5.1              
## [57] BiocParallel_1.24.1         rlang_0.4.12               
## [59] pkgconfig_2.0.3             matrixStats_0.60.0         
## [61] bitops_1.0-7                evaluate_0.14              
## [63] lattice_0.20-44             GenomicAlignments_1.26.0   
## [65] labeling_0.4.2              bit_4.0.4                  
## [67] tidyselect_1.1.1            magrittr_2.0.1             
## [69] R6_2.5.1                    snow_0.4-3                 
## [71] generics_0.1.0              DelayedArray_0.16.3        
## [73] DBI_1.1.1                   pillar_1.6.4               
## [75] haven_2.4.1                 withr_2.4.2                
## [77] RCurl_1.98-1.3              modelr_0.1.8               
## [79] crayon_1.4.2                utf8_1.2.2                 
## [81] tzdb_0.1.2                  rmarkdown_2.11             
## [83] grid_4.0.5                  readxl_1.3.1               
## [85] matrixcalc_1.0-5            reprex_2.0.0               
## [87] digest_0.6.28               openssl_1.4.4              
## [89] munsell_0.5.0               beeswarm_0.4.0             
## [91] vipor_0.4.5                 bslib_0.2.5.1              
## [93] askpass_1.1